diff --git a/.CMake/alg_support.cmake b/.CMake/alg_support.cmake index b313206163..f2d2a85920 100644 --- a/.CMake/alg_support.cmake +++ b/.CMake/alg_support.cmake @@ -232,6 +232,32 @@ endif() endif() +option(OQS_ENABLE_KEM_ML_KEM "Enable ml_kem algorithm family" ON) +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_512_ipd" OFF) +endif() +endif() + +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768_ipd" OFF) +endif() +endif() + +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024_ipd" OFF) +endif() +endif() + + option(OQS_ENABLE_SIG_DILITHIUM "Enable dilithium algorithm family" ON) cmake_dependent_option(OQS_ENABLE_SIG_dilithium_2 "" ON "OQS_ENABLE_SIG_DILITHIUM" OFF) if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux") @@ -273,6 +299,32 @@ endif() endif() +option(OQS_ENABLE_SIG_ML_DSA "Enable ml_dsa algorithm family" ON) +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_44_ipd" OFF) +endif() +endif() + +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_65_ipd" OFF) +endif() +endif() + +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF) +if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux") +if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) + cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_87_ipd" OFF) +endif() +endif() + + option(OQS_ENABLE_SIG_FALCON "Enable falcon algorithm family" ON) cmake_dependent_option(OQS_ENABLE_SIG_falcon_512 "" ON "OQS_ENABLE_SIG_FALCON" OFF) if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS)) @@ -396,7 +448,7 @@ if(NOT ((OQS_MINIMAL_BUILD STREQUAL "") OR (OQS_MINIMAL_BUILD STREQUAL "OFF"))) filter_algs("${OQS_MINIMAL_BUILD}") elseif (${OQS_ALGS_ENABLED} STREQUAL "STD") ##### OQS_COPY_FROM_UPSTREAM_FRAGMENT_LIST_STANDARDIZED_ALGS_START - filter_algs("KEM_kyber_512;KEM_kyber_768;KEM_kyber_1024;SIG_dilithium_2;SIG_dilithium_3;SIG_dilithium_5;SIG_falcon_512;SIG_falcon_1024;SIG_sphincs_sha2_128f_simple;SIG_sphincs_sha2_128s_simple;SIG_sphincs_sha2_192f_simple;SIG_sphincs_sha2_192s_simple;SIG_sphincs_sha2_256f_simple;SIG_sphincs_sha2_256s_simple;SIG_sphincs_shake_128f_simple;SIG_sphincs_shake_128s_simple;SIG_sphincs_shake_192f_simple;SIG_sphincs_shake_192s_simple;SIG_sphincs_shake_256f_simple;SIG_sphincs_shake_256s_simple") + filter_algs("KEM_ml_kem_512_ipd;KEM_ml_kem_768_ipd;KEM_ml_kem_1024_ipd;SIG_dilithium_;SIG_dilithium_;SIG_dilithium_;SIG_ml_dsa_44_ipd;SIG_ml_dsa_65_ipd;SIG_ml_dsa_87_ipd;SIG_falcon_512;SIG_falcon_1024;SIG_sphincs_sha2_128f_simple;SIG_sphincs_sha2_128s_simple;SIG_sphincs_sha2_192f_simple;SIG_sphincs_sha2_192s_simple;SIG_sphincs_sha2_256f_simple;SIG_sphincs_sha2_256s_simple;SIG_sphincs_shake_128f_simple;SIG_sphincs_shake_128s_simple;SIG_sphincs_shake_192f_simple;SIG_sphincs_shake_192s_simple;SIG_sphincs_shake_256f_simple;SIG_sphincs_shake_256s_simple") ##### OQS_COPY_FROM_UPSTREAM_FRAGMENT_LIST_STANDARDIZED_ALGS_END elseif(${OQS_ALGS_ENABLED} STREQUAL "NIST_R4") filter_algs("KEM_classic_mceliece_348864;KEM_classic_mceliece_348864f;KEM_classic_mceliece_460896;KEM_classic_mceliece_460896f;KEM_classic_mceliece_6688128;KEM_classic_mceliece_6688128f;KEM_classic_mceliece_6960119;KEM_classic_mceliece_6960119f;KEM_classic_mceliece_8192128;KEM_classic_mceliece_8192128f;KEM_hqc_128;KEM_hqc_192;KEM_hqc_256;KEM_bike_l1;KEM_bike_l3") diff --git a/CMakeLists.txt b/CMakeLists.txt index e1f070b3a7..06e5bd193c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,9 +167,15 @@ endif() if(OQS_ENABLE_KEM_KYBER) set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/kem/kyber/kem_kyber.h) endif() +if(OQS_ENABLE_KEM_ML_KEM) + set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/kem/ml_kem/kem_ml_kem.h) +endif() if(OQS_ENABLE_SIG_DILITHIUM) set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/dilithium/sig_dilithium.h) endif() +if(OQS_ENABLE_SIG_ML_DSA) + set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/ml_dsa/sig_ml_dsa.h) +endif() if(OQS_ENABLE_SIG_FALCON) set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/falcon/sig_falcon.h) endif() diff --git a/README.md b/README.md index 69edc15e68..aebb3dbae0 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ The list below indicates all algorithms supported by liboqs, but not all those a - **FrodoKEM**: FrodoKEM-640-AES, FrodoKEM-640-SHAKE, FrodoKEM-976-AES, FrodoKEM-976-SHAKE, FrodoKEM-1344-AES, FrodoKEM-1344-SHAKE - **HQC**: HQC-128, HQC-192, HQC-256 - **Kyber**: Kyber512, Kyber768, Kyber1024 +- **ML-KEM**: ML-KEM-512-ipd (alias: ML-KEM-512), ML-KEM-768-ipd (alias: ML-KEM-768), ML-KEM-1024-ipd (alias: ML-KEM-1024) - **NTRU-Prime**: sntrup761 @@ -54,6 +55,7 @@ The list below indicates all algorithms supported by liboqs, but not all those a - **CRYSTALS-Dilithium**: Dilithium2, Dilithium3, Dilithium5 - **Falcon**: Falcon-512, Falcon-1024 +- **ML-DSA**: ML-DSA-44-ipd (alias: ML-DSA-44), ML-DSA-65-ipd (alias: ML-DSA-65), ML-DSA-87-ipd (alias: ML-DSA-87) - **SPHINCS+-SHA2**: SPHINCS+-SHA2-128f-simple, SPHINCS+-SHA2-128s-simple, SPHINCS+-SHA2-192f-simple, SPHINCS+-SHA2-192s-simple, SPHINCS+-SHA2-256f-simple, SPHINCS+-SHA2-256s-simple - **SPHINCS+-SHAKE**: SPHINCS+-SHAKE-128f-simple, SPHINCS+-SHAKE-128s-simple, SPHINCS+-SHAKE-192f-simple, SPHINCS+-SHAKE-192s-simple, SPHINCS+-SHAKE-256f-simple, SPHINCS+-SHAKE-256s-simple @@ -176,8 +178,10 @@ liboqs includes some third party libraries or modules that are licensed differen - `src/kem/classic_mceliece/pqclean_*`: public domain - `src/kem/kyber/pqcrystals-*`: public domain (CC0) or Apache License v2.0 - `src/kem/kyber/pqclean_*`: public domain (CC0), and public domain (CC0) or Apache License v2.0, and public domain (CC0) or MIT, and MIT +- `src/kem/ml_kem/pqcrystals-*`: public domain (CC0) or Apache License v2.0 - `src/sig/dilithium/pqcrystals-*`: public domain (CC0) or Apache License v2.0 - `src/sig/dilithium/pqclean_*`: public domain (CC0), and public domain (CC0) or Apache License v2.0, and public domain (CC0) or MIT, and MIT +- `src/sig/ml_dsa/pqcrystals-*`: public domain (CC0) or Apache License v2.0 - `src/sig/sphincs/pqclean_*`: CC0 (public domain) ## Acknowledgements diff --git a/docs/algorithms/kem/bike.md b/docs/algorithms/kem/bike.md index 10741ad398..841993739c 100644 --- a/docs/algorithms/kem/bike.md +++ b/docs/algorithms/kem/bike.md @@ -13,11 +13,11 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| BIKE-L1 | IND-CPA | 1 | 1541 | 5223 | 1573 | 32 | -| BIKE-L3 | IND-CPA | 3 | 3083 | 10105 | 3115 | 32 | -| BIKE-L5 | IND-CPA | 5 | 5122 | 16494 | 5154 | 32 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| BIKE-L1 | NA | IND-CPA | 1 | 1541 | 5223 | 1573 | 32 | +| BIKE-L3 | NA | IND-CPA | 3 | 3083 | 10105 | 3115 | 32 | +| BIKE-L5 | NA | IND-CPA | 5 | 5122 | 16494 | 5154 | 32 | ## BIKE-L1 implementation characteristics diff --git a/docs/algorithms/kem/classic_mceliece.md b/docs/algorithms/kem/classic_mceliece.md index 29ba093075..68840c4b00 100644 --- a/docs/algorithms/kem/classic_mceliece.md +++ b/docs/algorithms/kem/classic_mceliece.md @@ -18,18 +18,18 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:-------------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| Classic-McEliece-348864 | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 | -| Classic-McEliece-348864f | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 | -| Classic-McEliece-460896 | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 | -| Classic-McEliece-460896f | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 | -| Classic-McEliece-6688128 | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 | -| Classic-McEliece-6688128f | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 | -| Classic-McEliece-6960119 | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 | -| Classic-McEliece-6960119f | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 | -| Classic-McEliece-8192128 | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 | -| Classic-McEliece-8192128f | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:-------------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| Classic-McEliece-348864 | NA | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 | +| Classic-McEliece-348864f | NA | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 | +| Classic-McEliece-460896 | NA | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 | +| Classic-McEliece-460896f | NA | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 | +| Classic-McEliece-6688128 | NA | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 | +| Classic-McEliece-6688128f | NA | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 | +| Classic-McEliece-6960119 | NA | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 | +| Classic-McEliece-6960119f | NA | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 | +| Classic-McEliece-8192128 | NA | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 | +| Classic-McEliece-8192128f | NA | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 | ## Classic-McEliece-348864 implementation characteristics diff --git a/docs/algorithms/kem/frodokem.md b/docs/algorithms/kem/frodokem.md index fbf5366b04..07f216a5a5 100644 --- a/docs/algorithms/kem/frodokem.md +++ b/docs/algorithms/kem/frodokem.md @@ -12,14 +12,14 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:-------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| FrodoKEM-640-AES | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 | -| FrodoKEM-640-SHAKE | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 | -| FrodoKEM-976-AES | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 | -| FrodoKEM-976-SHAKE | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 | -| FrodoKEM-1344-AES | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 | -| FrodoKEM-1344-SHAKE | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:-------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| FrodoKEM-640-AES | NA | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 | +| FrodoKEM-640-SHAKE | NA | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 | +| FrodoKEM-976-AES | NA | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 | +| FrodoKEM-976-SHAKE | NA | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 | +| FrodoKEM-1344-AES | NA | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 | +| FrodoKEM-1344-SHAKE | NA | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 | ## FrodoKEM-640-AES implementation characteristics diff --git a/docs/algorithms/kem/hqc.md b/docs/algorithms/kem/hqc.md index 84dab7f6c5..58d0834815 100644 --- a/docs/algorithms/kem/hqc.md +++ b/docs/algorithms/kem/hqc.md @@ -14,11 +14,11 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| HQC-128 | IND-CCA2 | 1 | 2249 | 2305 | 4433 | 64 | -| HQC-192 | IND-CCA2 | 3 | 4522 | 4586 | 8978 | 64 | -| HQC-256 | IND-CCA2 | 5 | 7245 | 7317 | 14421 | 64 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| HQC-128 | NA | IND-CCA2 | 1 | 2249 | 2305 | 4433 | 64 | +| HQC-192 | NA | IND-CCA2 | 3 | 4522 | 4586 | 8978 | 64 | +| HQC-256 | NA | IND-CCA2 | 5 | 7245 | 7317 | 14421 | 64 | ## HQC-128 implementation characteristics diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md index 9f2ad1f85d..a75c144a2d 100644 --- a/docs/algorithms/kem/kyber.md +++ b/docs/algorithms/kem/kyber.md @@ -17,11 +17,11 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| Kyber512 | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 | -| Kyber768 | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 | -| Kyber1024 | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| Kyber512 | NA | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 | +| Kyber768 | NA | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 | +| Kyber1024 | NA | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 | ## Kyber512 implementation characteristics diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md new file mode 100644 index 0000000000..92d1a5b4bd --- /dev/null +++ b/docs/algorithms/kem/ml_kem.md @@ -0,0 +1,53 @@ +# ML-KEM + +- **Algorithm type**: Key encapsulation mechanism. +- **Main cryptographic assumption**: Module LWE+R with base ring Z[x]/(3329, x^256+1). +- **Principal submitters**: Peter Schwabe. +- **Auxiliary submitters**: Roberto Avanzi, Joppe Bos, Léo Ducas, Eike Kiltz, Tancrède Lepoint, Vadim Lyubashevsky, John M. Schanck, Gregor Seiler, Damien Stehlé. +- **Authors' website**: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203/ipd +- **Specification version**: ML-KEM-ipd. +- **Primary Source**: + - **Source**: https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816 with copy_from_upstream patches + - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 + + +## Parameter set summary + +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| ML-KEM-512-ipd | ML-KEM-512 | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 | +| ML-KEM-768-ipd | ML-KEM-768 | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 | +| ML-KEM-1024-ipd | ML-KEM-1024 | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 | + +## ML-KEM-512-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + + ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file. + +## ML-KEM-768-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + +## ML-KEM-1024-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + +## Explanation of Terms + +- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments. \ No newline at end of file diff --git a/docs/algorithms/kem/ml_kem.yml b/docs/algorithms/kem/ml_kem.yml new file mode 100644 index 0000000000..38b0a3ef24 --- /dev/null +++ b/docs/algorithms/kem/ml_kem.yml @@ -0,0 +1,125 @@ +name: ML-KEM +type: kem +principal-submitters: +- Peter Schwabe +auxiliary-submitters: +- Roberto Avanzi +- Joppe Bos +- Léo Ducas +- Eike Kiltz +- Tancrède Lepoint +- Vadim Lyubashevsky +- John M. Schanck +- Gregor Seiler +- Damien Stehlé +crypto-assumption: Module LWE+R with base ring Z[x]/(3329, x^256+1) +website: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203/ipd +nist-round: ipd +spec-version: ML-KEM-ipd +primary-upstream: + source: https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816 + with copy_from_upstream patches + spdx-license-identifier: CC0-1.0 or Apache-2.0 +parameter-sets: +- name: ML-KEM-512-ipd + alias: ML-KEM-512 + claimed-nist-level: 1 + claimed-security: IND-CCA2 + length-public-key: 800 + length-ciphertext: 768 + length-secret-key: 1632 + length-shared-secret: 32 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - avx2 + - bmi2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false +- name: ML-KEM-768-ipd + alias: ML-KEM-768 + claimed-nist-level: 3 + claimed-security: IND-CCA2 + length-public-key: 1184 + length-ciphertext: 1088 + length-secret-key: 2400 + length-shared-secret: 32 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - avx2 + - bmi2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false +- name: ML-KEM-1024-ipd + alias: ML-KEM-1024 + claimed-nist-level: 5 + claimed-security: IND-CCA2 + length-public-key: 1568 + length-ciphertext: 1568 + length-secret-key: 3168 + length-shared-secret: 32 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - avx2 + - bmi2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false diff --git a/docs/algorithms/kem/ntruprime.md b/docs/algorithms/kem/ntruprime.md index 07a7ca899d..5ff56716ff 100644 --- a/docs/algorithms/kem/ntruprime.md +++ b/docs/algorithms/kem/ntruprime.md @@ -14,9 +14,9 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| -| sntrup761 | IND-CCA2 | 2 | 1158 | 1763 | 1039 | 32 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:| +| sntrup761 | NA | IND-CCA2 | 2 | 1158 | 1763 | 1039 | 32 | ## sntrup761 implementation characteristics diff --git a/docs/algorithms/sig/dilithium.md b/docs/algorithms/sig/dilithium.md index cd4ecb7336..d26daa2854 100644 --- a/docs/algorithms/sig/dilithium.md +++ b/docs/algorithms/sig/dilithium.md @@ -17,11 +17,11 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| -| Dilithium2 | EUF-CMA | 2 | 1312 | 2528 | 2420 | -| Dilithium3 | EUF-CMA | 3 | 1952 | 4000 | 3293 | -| Dilithium5 | EUF-CMA | 5 | 2592 | 4864 | 4595 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| +| Dilithium2 | NA | EUF-CMA | 2 | 1312 | 2528 | 2420 | +| Dilithium3 | NA | EUF-CMA | 3 | 1952 | 4000 | 3293 | +| Dilithium5 | NA | EUF-CMA | 5 | 2592 | 4864 | 4595 | ## Dilithium2 implementation characteristics diff --git a/docs/algorithms/sig/falcon.md b/docs/algorithms/sig/falcon.md index 1221110624..df05809687 100644 --- a/docs/algorithms/sig/falcon.md +++ b/docs/algorithms/sig/falcon.md @@ -13,10 +13,10 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | -|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| -| Falcon-512 | EUF-CMA | 1 | 897 | 1281 | 666 | -| Falcon-1024 | EUF-CMA | 5 | 1793 | 2305 | 1280 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| +| Falcon-512 | NA | EUF-CMA | 1 | 897 | 1281 | 666 | +| Falcon-1024 | NA | EUF-CMA | 5 | 1793 | 2305 | 1280 | ## Falcon-512 implementation characteristics diff --git a/docs/algorithms/sig/ml_dsa.md b/docs/algorithms/sig/ml_dsa.md new file mode 100644 index 0000000000..ab2b43488e --- /dev/null +++ b/docs/algorithms/sig/ml_dsa.md @@ -0,0 +1,53 @@ +# ML-DSA + +- **Algorithm type**: Digital signature scheme. +- **Main cryptographic assumption**: hardness of lattice problems over module lattices. +- **Principal submitters**: Vadim Lyubashevsky. +- **Auxiliary submitters**: Shi Bai, Léo Ducas, Eike Kiltz, Tancrède Lepoint, Peter Schwabe, Gregor Seiler, Damien Stehlé. +- **Authors' website**: https://pq-crystals.org/dilithium/ and https://csrc.nist.gov/pubs/fips/204/ipd +- **Specification version**: ML-DSA-ipd. +- **Primary Source**: + - **Source**: https://github.com/pq-crystals/dilithium/commit/e7bed6258b9a3703ce78d4ec38021c86382ce31c with copy_from_upstream patches + - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 + + +## Parameter set summary + +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | +|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| +| ML-DSA-44-ipd | ML-DSA-44 | EUF-CMA | 2 | 1312 | 2560 | 2420 | +| ML-DSA-65-ipd | ML-DSA-65 | EUF-CMA | 3 | 1952 | 4032 | 3309 | +| ML-DSA-87-ipd | ML-DSA-87 | EUF-CMA | 5 | 2592 | 4896 | 4627 | + +## ML-DSA-44-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + + ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file. + +## ML-DSA-65-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + +## ML-DSA-87-ipd implementation characteristics + +| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | +|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| +| [Primary Source](#primary-source) | ref | All | All | None | True | True | False | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False | + +Are implementations chosen based on runtime CPU feature detection? **Yes**. + +## Explanation of Terms + +- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments. \ No newline at end of file diff --git a/docs/algorithms/sig/ml_dsa.yml b/docs/algorithms/sig/ml_dsa.yml new file mode 100644 index 0000000000..c936883588 --- /dev/null +++ b/docs/algorithms/sig/ml_dsa.yml @@ -0,0 +1,117 @@ +name: ML-DSA +type: signature +principal-submitters: +- Vadim Lyubashevsky +auxiliary-submitters: +- Shi Bai +- Léo Ducas +- Eike Kiltz +- Tancrède Lepoint +- Peter Schwabe +- Gregor Seiler +- Damien Stehlé +crypto-assumption: hardness of lattice problems over module lattices +website: https://pq-crystals.org/dilithium/ and https://csrc.nist.gov/pubs/fips/204/ipd +nist-round: ipd +spec-version: ML-DSA-ipd +primary-upstream: + source: https://github.com/pq-crystals/dilithium/commit/e7bed6258b9a3703ce78d4ec38021c86382ce31c + with copy_from_upstream patches + spdx-license-identifier: CC0-1.0 or Apache-2.0 +parameter-sets: +- name: ML-DSA-44-ipd + alias: ML-DSA-44 + claimed-nist-level: 2 + claimed-security: EUF-CMA + length-public-key: 1312 + length-secret-key: 2560 + length-signature: 2420 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false +- name: ML-DSA-65-ipd + alias: ML-DSA-65 + claimed-nist-level: 3 + claimed-security: EUF-CMA + length-public-key: 1952 + length-secret-key: 4032 + length-signature: 3309 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false +- name: ML-DSA-87-ipd + alias: ML-DSA-87 + claimed-nist-level: 5 + claimed-security: EUF-CMA + length-public-key: 2592 + length-secret-key: 4896 + length-signature: 4627 + implementations-switch-on-runtime-cpu-features: true + implementations: + - upstream: primary-upstream + upstream-id: ref + supported-platforms: all + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false + - upstream: primary-upstream + upstream-id: avx2 + supported-platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 + - popcnt + common-crypto: + - SHA3: liboqs + no-secret-dependent-branching-claimed: true + no-secret-dependent-branching-checked-by-valgrind: true + large-stack-usage: false diff --git a/docs/algorithms/sig/sphincs.md b/docs/algorithms/sig/sphincs.md index 3ce2b0ae96..a1660e483d 100644 --- a/docs/algorithms/sig/sphincs.md +++ b/docs/algorithms/sig/sphincs.md @@ -17,20 +17,20 @@ ## Parameter set summary -| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | -|:--------------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| -| SPHINCS+-SHA2-128f-simple | EUF-CMA | 1 | 32 | 64 | 17088 | -| SPHINCS+-SHA2-128s-simple | EUF-CMA | 1 | 32 | 64 | 7856 | -| SPHINCS+-SHA2-192f-simple | EUF-CMA | 3 | 48 | 96 | 35664 | -| SPHINCS+-SHA2-192s-simple | EUF-CMA | 3 | 48 | 96 | 16224 | -| SPHINCS+-SHA2-256f-simple | EUF-CMA | 5 | 64 | 128 | 49856 | -| SPHINCS+-SHA2-256s-simple | EUF-CMA | 5 | 64 | 128 | 29792 | -| SPHINCS+-SHAKE-128f-simple | EUF-CMA | 1 | 32 | 64 | 17088 | -| SPHINCS+-SHAKE-128s-simple | EUF-CMA | 1 | 32 | 64 | 7856 | -| SPHINCS+-SHAKE-192f-simple | EUF-CMA | 3 | 48 | 96 | 35664 | -| SPHINCS+-SHAKE-192s-simple | EUF-CMA | 3 | 48 | 96 | 16224 | -| SPHINCS+-SHAKE-256f-simple | EUF-CMA | 5 | 64 | 128 | 49856 | -| SPHINCS+-SHAKE-256s-simple | EUF-CMA | 5 | 64 | 128 | 29792 | +| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) | +|:--------------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:| +| SPHINCS+-SHA2-128f-simple | NA | EUF-CMA | 1 | 32 | 64 | 17088 | +| SPHINCS+-SHA2-128s-simple | NA | EUF-CMA | 1 | 32 | 64 | 7856 | +| SPHINCS+-SHA2-192f-simple | NA | EUF-CMA | 3 | 48 | 96 | 35664 | +| SPHINCS+-SHA2-192s-simple | NA | EUF-CMA | 3 | 48 | 96 | 16224 | +| SPHINCS+-SHA2-256f-simple | NA | EUF-CMA | 5 | 64 | 128 | 49856 | +| SPHINCS+-SHA2-256s-simple | NA | EUF-CMA | 5 | 64 | 128 | 29792 | +| SPHINCS+-SHAKE-128f-simple | NA | EUF-CMA | 1 | 32 | 64 | 17088 | +| SPHINCS+-SHAKE-128s-simple | NA | EUF-CMA | 1 | 32 | 64 | 7856 | +| SPHINCS+-SHAKE-192f-simple | NA | EUF-CMA | 3 | 48 | 96 | 35664 | +| SPHINCS+-SHAKE-192s-simple | NA | EUF-CMA | 3 | 48 | 96 | 16224 | +| SPHINCS+-SHAKE-256f-simple | NA | EUF-CMA | 5 | 64 | 128 | 49856 | +| SPHINCS+-SHAKE-256s-simple | NA | EUF-CMA | 5 | 64 | 128 | 29792 | ## SPHINCS+-SHA2-128f-simple implementation characteristics diff --git a/docs/cbom.json b/docs/cbom.json index 87a7be3227..02d2d59ca4 100644 --- a/docs/cbom.json +++ b/docs/cbom.json @@ -1,23 +1,23 @@ { "bomFormat": "CBOM", "specVersion": "1.4-cbom-1.0", - "serialNumber": "urn:uuid:043e6cd4-f2a6-4828-ae97-7cbdbd372414", + "serialNumber": "urn:uuid:c25dad99-ad00-48b6-aa9e-25d4f7c3c8c5", "version": 1, "metadata": { - "timestamp": "2023-10-19T08:58:49.361520", + "timestamp": "2023-12-13T17:05:36.137517", "component": { "type": "library", - "bom-ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d", + "bom-ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0", "name": "liboqs", - "version": "4846f81a98232e6c90f08578e8f122146550be8d" + "version": "5f83324a6c464448b70b1e57b3cd161b6832e0e0" } }, "components": [ { "type": "library", - "bom-ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d", + "bom-ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0", "name": "liboqs", - "version": "4846f81a98232e6c90f08578e8f122146550be8d" + "version": "5f83324a6c464448b70b1e57b3cd161b6832e0e0" }, { "type": "crypto-asset", @@ -959,6 +959,126 @@ "nistQuantumSecurityLevel": 5 } }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-512-ipd:generic", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-512-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 1 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-512-ipd:x86_64", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-512-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 1 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-768-ipd:generic", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-768-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 3 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-768-ipd:x86_64", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-768-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 3 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-1024-ipd:generic", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-1024-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 5 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-KEM-1024-ipd:x86_64", + "name": "ML-KEM", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-KEM-1024-ipd", + "primitive": "kem", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "encapsulate", + "decapsulate" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 5 + } + }, { "type": "crypto-asset", "bom-ref": "alg:sntrup761:generic", @@ -1299,6 +1419,126 @@ "nistQuantumSecurityLevel": 5 } }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-44-ipd:generic", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-44-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 2 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-44-ipd:x86_64", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-44-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 2 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-65-ipd:generic", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-65-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 3 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-65-ipd:x86_64", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-65-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 3 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-87-ipd:generic", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-87-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "generic" + }, + "nistQuantumSecurityLevel": 5 + } + }, + { + "type": "crypto-asset", + "bom-ref": "alg:ML-DSA-87-ipd:x86_64", + "name": "ML-DSA", + "cryptoProperties": { + "assetType": "algorithm", + "algorithmProperties": { + "variant": "ML-DSA-87-ipd", + "primitive": "signature", + "implementationLevel": "softwarePlainRam", + "cryptoFunctions": [ + "keygen", + "sign", + "verify" + ], + "implementationPlatform": "x86_64" + }, + "nistQuantumSecurityLevel": 5 + } + }, { "type": "crypto-asset", "bom-ref": "alg:SPHINCS+-SHA2-128f-simple:generic", @@ -1808,7 +2048,7 @@ ], "dependencies": [ { - "ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d", + "ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0", "dependsOn": [ "alg:BIKE-L1:x86_64", "alg:BIKE-L3:x86_64", @@ -1857,6 +2097,12 @@ "alg:Kyber1024:generic", "alg:Kyber1024:x86_64", "alg:Kyber1024:armv8-a", + "alg:ML-KEM-512-ipd:generic", + "alg:ML-KEM-512-ipd:x86_64", + "alg:ML-KEM-768-ipd:generic", + "alg:ML-KEM-768-ipd:x86_64", + "alg:ML-KEM-1024-ipd:generic", + "alg:ML-KEM-1024-ipd:x86_64", "alg:sntrup761:generic", "alg:sntrup761:x86_64", "alg:Dilithium2:generic", @@ -1874,6 +2120,12 @@ "alg:Falcon-1024:generic", "alg:Falcon-1024:x86_64", "alg:Falcon-1024:armv8-a", + "alg:ML-DSA-44-ipd:generic", + "alg:ML-DSA-44-ipd:x86_64", + "alg:ML-DSA-65-ipd:generic", + "alg:ML-DSA-65-ipd:x86_64", + "alg:ML-DSA-87-ipd:generic", + "alg:ML-DSA-87-ipd:x86_64", "alg:SPHINCS+-SHA2-128f-simple:generic", "alg:SPHINCS+-SHA2-128f-simple:x86_64", "alg:SPHINCS+-SHA2-128s-simple:generic", @@ -2262,6 +2514,48 @@ ], "dependencyType": "uses" }, + { + "ref": "alg:ML-KEM-512-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-KEM-512-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-KEM-768-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-KEM-768-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-KEM-1024-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-KEM-1024-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, { "ref": "alg:sntrup761:generic", "dependsOn": [ @@ -2381,6 +2675,48 @@ ], "dependencyType": "uses" }, + { + "ref": "alg:ML-DSA-44-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-DSA-44-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-DSA-65-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-DSA-65-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-DSA-87-ipd:generic", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, + { + "ref": "alg:ML-DSA-87-ipd:x86_64", + "dependsOn": [ + "alg:sha3" + ], + "dependencyType": "uses" + }, { "ref": "alg:SPHINCS+-SHAKE-128f-simple:generic", "dependsOn": [ diff --git a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment index b600b1e51f..cb3b7d15dd 100644 --- a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment +++ b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment @@ -2,6 +2,9 @@ option(OQS_ENABLE_KEM_{{ family['name']|upper }} "Enable {{ family['name'] }} algorithm family" ON) {%- for scheme in family['schemes'] %} cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }} "" ON "OQS_ENABLE_KEM_{{ family['name']|upper }}" OFF) +{%- if 'alias_scheme' in scheme %} +cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "" ON "OQS_ENABLE_KEM_{{ family['name']|upper }}" OFF) +{%- endif -%} {%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] and impl['supported_platforms'] -%} {%- for platform in impl['supported_platforms'] if platform['architecture'] == 'x86_64' %} {% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}") @@ -37,6 +40,9 @@ endif() option(OQS_ENABLE_SIG_{{ family['name']|upper }} "Enable {{ family['name'] }} algorithm family" ON) {%- for scheme in family['schemes'] %} cmake_dependent_option(OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }} "" ON "OQS_ENABLE_SIG_{{ family['name']|upper }}" OFF) +{%- if 'alias_scheme' in scheme %} +cmake_dependent_option(OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "" ON "OQS_ENABLE_SIG_{{ family['name']|upper }}" OFF) +{%- endif -%} {%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] and impl['supported_platforms'] -%} {%- for platform in impl['supported_platforms'] if platform['architecture'] == 'x86_64' %} {% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}") diff --git a/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment b/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment index fc5a4fecbc..d0bf2a61b2 100644 --- a/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment +++ b/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment @@ -1,6 +1,6 @@ filter_algs(" -{%- for family in instructions['kems'] if family['name'] == 'kyber' -%} +{%- for family in instructions['kems'] if family['name'] in ['ml_kem'] -%} {%- for scheme in family['schemes'] -%} KEM_{{ family['name'] }}_{{ scheme['scheme'] }}; {%- endfor -%} @@ -8,7 +8,7 @@ {%- for family in instructions['sigs'] -%} {%- set outer_loop = loop -%} {%- for scheme in family['schemes'] -%} - SIG_{{ family['name'] }}_{{ scheme['scheme'] }}{%- if not (outer_loop.last and loop.last) -%};{%- endif -%} + SIG_{{ family['name'] }}_{{ scheme['scheme'] if family['name'] in ['ml_dsa', 'falcon', 'sphincs'] }}{%- if not (outer_loop.last and loop.last) -%};{%- endif -%} {%- endfor -%} {%- endfor -%} ") diff --git a/scripts/copy_from_upstream/copy_from_upstream.py b/scripts/copy_from_upstream/copy_from_upstream.py index 1f3cdc4bed..32d897cdf8 100755 --- a/scripts/copy_from_upstream/copy_from_upstream.py +++ b/scripts/copy_from_upstream/copy_from_upstream.py @@ -550,6 +550,8 @@ def process_families(instructions, basedir, with_kat, with_generator): print("Adding new KAT for %s" % (scheme['pretty_name_full'])) pass kats['kem'][scheme['pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256'] + if 'alias_pretty_name_full' in scheme: + kats['kem'][scheme['alias_pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256'] else: try: if kats['sig'][scheme['pretty_name_full']]['single'] != scheme['metadata']['nistkat-sha256']: @@ -558,6 +560,8 @@ def process_families(instructions, basedir, with_kat, with_generator): print("Adding new KAT for %s" % (scheme['pretty_name_full'])) pass kats['sig'][scheme['pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256'] + if 'alias_pretty_name_full' in scheme: + kats['sig'][scheme['alias_pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256'] if with_generator: generator( diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml index 8c1313ac1e..f55b8798ba 100644 --- a/scripts/copy_from_upstream/copy_from_upstream.yml +++ b/scripts/copy_from_upstream/copy_from_upstream.yml @@ -29,6 +29,14 @@ upstreams: kem_meta_path: '{pretty_name_full}_META.yml' kem_scheme_path: '.' patches: [pqcrystals-kyber-yml.patch, pqcrystals-kyber-ref-shake-aes.patch, pqcrystals-kyber-avx2-shake-aes.patch] + - + name: pqcrystals-kyber-standard + git_url: https://github.com/pq-crystals/kyber.git + git_branch: standard + git_commit: 11d00ff1f20cfca1f72d819e5a45165c1e0a2816 + kem_meta_path: '{pretty_name_full}_META.yml' + kem_scheme_path: '.' + patches: [pqcrystals-ml_kem_ipd.patch] - name: pqcrystals-dilithium git_url: https://github.com/pq-crystals/dilithium.git @@ -37,6 +45,14 @@ upstreams: sig_meta_path: '{pretty_name_full}_META.yml' sig_scheme_path: '.' patches: [pqcrystals-dilithium-yml.patch, pqcrystals-dilithium-ref-shake-aes.patch, pqcrystals-dilithium-avx2-shake-aes.patch] + - + name: pqcrystals-dilithium-standard + git_url: https://github.com/pq-crystals/dilithium.git + git_branch: standard + git_commit: e7bed6258b9a3703ce78d4ec38021c86382ce31c + sig_meta_path: '{pretty_name_full}_META.yml' + sig_scheme_path: '.' + patches: [pqcrystals-ml_dsa_ipd.patch] kems: - name: classic_mceliece @@ -121,6 +137,29 @@ kems: scheme: "1024" pqclean_scheme: kyber1024 pretty_name_full: Kyber1024 + - + name: ml_kem + default_implementation: ref + upstream_location: pqcrystals-kyber-standard + schemes: + - + scheme: "512_ipd" + pqclean_scheme: ml-kem-512-ipd + pretty_name_full: ML-KEM-512-ipd + alias_scheme: "512" + alias_pretty_name_full: ML-KEM-512 + - + scheme: "768_ipd" + pqclean_scheme: ml-kem-768-ipd + pretty_name_full: ML-KEM-768-ipd + alias_scheme: "768" + alias_pretty_name_full: ML-KEM-768 + - + scheme: "1024_ipd" + pqclean_scheme: ml-kem-1024-ipd + pretty_name_full: ML-KEM-1024-ipd + alias_scheme: "1024" + alias_pretty_name_full: ML-KEM-1024 sigs: - name: dilithium @@ -146,6 +185,32 @@ sigs: pqclean_scheme: dilithium5 pretty_name_full: Dilithium5 signed_msg_order: sig_then_msg + - + name: ml_dsa + default_implementation: ref + upstream_location: pqcrystals-dilithium-standard + schemes: + - + scheme: "44_ipd" + pqclean_scheme: ml-dsa-44-ipd + pretty_name_full: ML-DSA-44-ipd + signed_msg_order: sig_then_msg + alias_scheme: "44" + alias_pretty_name_full: ML-DSA-44 + - + scheme: "65_ipd" + pqclean_scheme: ml-dsa-65-ipd + pretty_name_full: ML-DSA-65-ipd + signed_msg_order: sig_then_msg + alias_scheme: "65" + alias_pretty_name_full: ML-DSA-65 + - + scheme: "87_ipd" + pqclean_scheme: ml-dsa-87-ipd + pretty_name_full: ML-DSA-87-ipd + signed_msg_order: sig_then_msg + alias_scheme: "87" + alias_pretty_name_full: ML-DSA-87 - name: falcon default_implementation: clean diff --git a/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch b/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch new file mode 100644 index 0000000000..58e1cf34de --- /dev/null +++ b/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch @@ -0,0 +1,842 @@ +diff --git a/Dilithium2_META.yml b/ML-DSA-44-ipd_META.yml +index 0e2e6fc..d99edb5 100644 +--- a/Dilithium2_META.yml ++++ b/ML-DSA-44-ipd_META.yml +@@ -1,11 +1,11 @@ +-name: Dilithium2 ++name: ML-DSA-44-ipd + type: signature + claimed-nist-level: 2 + length-public-key: 1312 +-length-secret-key: 2528 ++length-secret-key: 2560 + length-signature: 2420 +-nistkat-sha256: 26ae9c1224171e957dbe38672942d31edb7dffbe700825e0cb52128cdb45280a +-testvectors-sha256: b56155479f5643a3cb3d73260ba2b1fd7e772a49b6f4cebcf742cd860fbf6879 ++nistkat-sha256: e6f3ec4dc0b02dd3bcbbc6b105190e1890ca0bb3f802e2b571f0d70f3993a2e1 ++testvectors-sha256: aff4dbcb0c5ad52c840036907661efd2cafd6c1cba95ed052184f45adf30f365 + principal-submitters: + - Vadim Lyubashevsky + auxiliary-submitters: +@@ -18,22 +18,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 ++ version: https://github.com/pq-crystals/dilithium/tree/standard + folder_name: ref +- compile_opts: -DDILITHIUM_MODE=2 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium2_ref_keypair +- signature_signature: pqcrystals_dilithium2_ref_signature +- signature_verify: pqcrystals_dilithium2_ref_verify +- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ compile_opts: -DDILITHIUM_MODE=2 ++ signature_keypair: pqcrystals_ml_dsa_44_ipd_ref_keypair ++ signature_signature: pqcrystals_ml_dsa_44_ipd_ref_signature ++ signature_verify: pqcrystals_ml_dsa_44_ipd_ref_verify ++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 +- compile_opts: -DDILITHIUM_MODE=2 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium2_avx2_keypair +- signature_signature: pqcrystals_dilithium2_avx2_signature +- signature_verify: pqcrystals_dilithium2_avx2_verify +- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 ++ version: https://github.com/pq-crystals/dilithium/tree/standard ++ compile_opts: -DDILITHIUM_MODE=2 ++ signature_keypair: pqcrystals_ml_dsa_44_ipd_avx2_keypair ++ signature_signature: pqcrystals_ml_dsa_44_ipd_avx2_signature ++ signature_verify: pqcrystals_ml_dsa_44_ipd_avx2_verify ++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/Dilithium3_META.yml b/ML-DSA-65-ipd_META.yml +index d1bca64..72a43e7 100644 +--- a/Dilithium3_META.yml ++++ b/ML-DSA-65-ipd_META.yml +@@ -1,11 +1,11 @@ +-name: Dilithium3 ++name: ML-DSA-65-ipd + type: signature + claimed-nist-level: 3 + length-public-key: 1952 +-length-secret-key: 4000 +-length-signature: 3293 +-nistkat-sha256: eea584803c3d6991a4acbf9f117147bbdd246faf822cfb1a17effe20b2052ba9 +-testvectors-sha256: a237032c7840a0d2f922951f806c2199f8f86b8a8947f6f6f1b856c925222958 ++length-secret-key: 4032 ++length-signature: 3309 ++nistkat-sha256: 7225c4531086d88c9b7fa18101b0f78dda2d38df88812c65ddc1ae94fe3c01a7 ++testvectors-sha256: e0a98c0a29137dcbeb12104ccaa6a0555a9bdb4dcfbc2b0fc9a959dd8b6c8699 + principal-submitters: + - Vadim Lyubashevsky + auxiliary-submitters: +@@ -18,22 +18,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 ++ version: https://github.com/pq-crystals/dilithium/tree/standard + folder_name: ref +- compile_opts: -DDILITHIUM_MODE=3 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium3_ref_keypair +- signature_signature: pqcrystals_dilithium3_ref_signature +- signature_verify: pqcrystals_dilithium3_ref_verify +- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ compile_opts: -DDILITHIUM_MODE=3 ++ signature_keypair: pqcrystals_ml_dsa_65_ipd_ref_keypair ++ signature_signature: pqcrystals_ml_dsa_65_ipd_ref_signature ++ signature_verify: pqcrystals_ml_dsa_65_ipd_ref_verify ++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 +- compile_opts: -DDILITHIUM_MODE=3 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium3_avx2_keypair +- signature_signature: pqcrystals_dilithium3_avx2_signature +- signature_verify: pqcrystals_dilithium3_avx2_verify +- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 ++ version: https://github.com/pq-crystals/dilithium/tree/standard ++ compile_opts: -DDILITHIUM_MODE=3 ++ signature_keypair: pqcrystals_ml_dsa_65_ipd_avx2_keypair ++ signature_signature: pqcrystals_ml_dsa_65_ipd_avx2_signature ++ signature_verify: pqcrystals_ml_dsa_65_ipd_avx2_verify ++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/Dilithium5_META.yml b/ML-DSA-87-ipd_META.yml +index a4dbdbf..bf68590 100644 +--- a/Dilithium5_META.yml ++++ b/ML-DSA-87-ipd_META.yml +@@ -1,11 +1,11 @@ +-name: Dilithium5 ++name: ML-DSA-87-ipd + type: signature + claimed-nist-level: 5 + length-public-key: 2592 +-length-secret-key: 4864 +-length-signature: 4595 +-nistkat-sha256: 3f6e58603a38be57cf08d79b01fcfd0ccc1129a09e14a6122c6fe22c906ddc3b +-testvectors-sha256: ddeb95f4a743562010bce527ea7c99fed4ce1234bafd5ed6f44eea0f065ba49c ++length-secret-key: 4896 ++length-signature: 4627 ++nistkat-sha256: f5cb5ed44a261a4118f9cfd5d55b4210939cb5b8531968a10c37060551a8927f ++testvectors-sha256: 9a1985c10b13efefee50067edf3432ed8ab48a62965743feb45a317485980883 + principal-submitters: + - Vadim Lyubashevsky + auxiliary-submitters: +@@ -18,22 +18,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 ++ version: https://github.com/pq-crystals/dilithium/tree/standard + folder_name: ref +- compile_opts: -DDILITHIUM_MODE=5 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium5_ref_keypair +- signature_signature: pqcrystals_dilithium5_ref_signature +- signature_verify: pqcrystals_dilithium5_ref_verify +- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ compile_opts: -DDILITHIUM_MODE=5 ++ signature_keypair: pqcrystals_ml_dsa_87_ipd_ref_keypair ++ signature_signature: pqcrystals_ml_dsa_87_ipd_ref_signature ++ signature_verify: pqcrystals_ml_dsa_87_ipd_ref_verify ++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409 +- compile_opts: -DDILITHIUM_MODE=5 -DDILITHIUM_RANDOMIZED_SIGNING +- signature_keypair: pqcrystals_dilithium5_avx2_keypair +- signature_signature: pqcrystals_dilithium5_avx2_signature +- signature_verify: pqcrystals_dilithium5_avx2_verify +- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 ++ version: https://github.com/pq-crystals/dilithium/tree/standard ++ compile_opts: -DDILITHIUM_MODE=5 ++ signature_keypair: pqcrystals_ml_dsa_87_ipd_avx2_keypair ++ signature_signature: pqcrystals_ml_dsa_87_ipd_avx2_signature ++ signature_verify: pqcrystals_ml_dsa_87_ipd_avx2_verify ++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/README.md b/README.md +index 5a5d48d..d6b337a 100644 +--- a/README.md ++++ b/README.md +@@ -18,9 +18,9 @@ brew install openssl + ``` + Then, run + ```sh +-export CFLAGS="-I/usr/local/opt/openssl@1.1/include" +-export NISTFLAGS="-I/usr/local/opt/openssl@1.1/include" +-export LDFLAGS="-L/usr/local/opt/openssl@1.1/lib" ++export CFLAGS="-I/opt/homebrew/opt/openssl@1.1/include" ++export NISTFLAGS="-I/opt/homebrew/opt/openssl@1.1/include" ++export LDFLAGS="-L/opt/homebrew/opt/openssl@1.1/lib" + ``` + before compilation to add the OpenSSL header and library locations to the respective search paths. + +@@ -60,11 +60,11 @@ Our Dilithium implementations are contained in the [SUPERCOP](https://bench.cr.y + + ## Randomized signing + +-By default our code implements Dilithium's deterministic signing mode. To change this to the randomized signing mode, define the `DILITHIUM_RANDOMIZED_SIGNING` preprocessor macro at compilation by either uncommenting the line ++By default our code implements Dilithium's randomized signing mode. To change this to the deterministic signing mode, undefine the `DILITHIUM_RANDOMIZED_SIGNING` preprocessor macro at compilation by commenting the line + ```sh +-//#define DILITHIUM_RANDOMIZED_SIGNING ++#define DILITHIUM_RANDOMIZED_SIGNING + ``` +-in config.h, or adding `-DDILITHIUM_RANDOMIZED_SIGNING` to the compiler flags in the environment variable `CFLAGS`. ++in config.h. + + ## Shared libraries + +diff --git a/avx2/api.h b/avx2/api.h +index 1948a96..55b6376 100644 +--- a/avx2/api.h ++++ b/avx2/api.h +@@ -5,7 +5,7 @@ + #include + + #define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +-#define pqcrystals_dilithium2_SECRETKEYBYTES 2528 ++#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 + #define pqcrystals_dilithium2_BYTES 2420 + + #define pqcrystals_dilithium2_avx2_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +@@ -32,8 +32,8 @@ int pqcrystals_dilithium2_avx2_open(uint8_t *m, size_t *mlen, + + + #define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +-#define pqcrystals_dilithium3_SECRETKEYBYTES 4000 +-#define pqcrystals_dilithium3_BYTES 3293 ++#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 ++#define pqcrystals_dilithium3_BYTES 3309 + + #define pqcrystals_dilithium3_avx2_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES + #define pqcrystals_dilithium3_avx2_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +@@ -59,8 +59,8 @@ int pqcrystals_dilithium3_avx2_open(uint8_t *m, size_t *mlen, + + + #define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +-#define pqcrystals_dilithium5_SECRETKEYBYTES 4864 +-#define pqcrystals_dilithium5_BYTES 4595 ++#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 ++#define pqcrystals_dilithium5_BYTES 4627 + + #define pqcrystals_dilithium5_avx2_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES + #define pqcrystals_dilithium5_avx2_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +diff --git a/avx2/config.h b/avx2/config.h +index ba5caa8..e59f81a 100644 +--- a/avx2/config.h ++++ b/avx2/config.h +@@ -2,7 +2,7 @@ + #define CONFIG_H + + //#define DILITHIUM_MODE 2 +-//#define DILITHIUM_RANDOMIZED_SIGNING ++#define DILITHIUM_RANDOMIZED_SIGNING + //#define USE_RDPMC + //#define DBENCH + +@@ -11,17 +11,17 @@ + #endif + + #if DILITHIUM_MODE == 2 +-#define CRYPTO_ALGNAME "Dilithium2" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium2_avx2 +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium2_avx2_##s ++#define CRYPTO_ALGNAME "ML-DSA-44-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_avx2 ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_avx2_##s + #elif DILITHIUM_MODE == 3 +-#define CRYPTO_ALGNAME "Dilithium3" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium3_avx2 +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium3_avx2_##s ++#define CRYPTO_ALGNAME "ML-DSA-65-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_avx2 ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_avx2_##s + #elif DILITHIUM_MODE == 5 +-#define CRYPTO_ALGNAME "Dilithium5" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium5_avx2 +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium5_avx2_##s ++#define CRYPTO_ALGNAME "ML-DSA-87-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_avx2 ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_avx2_##s + #endif + + #endif +diff --git a/avx2/poly.c b/avx2/poly.c +index c1b21c1..25d3682 100644 +--- a/avx2/poly.c ++++ b/avx2/poly.c +@@ -401,6 +401,7 @@ void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) + stream128_state state; + stream128_init(&state, seed, nonce); + poly_uniform_preinit(a, &state); ++ stream128_release(&state); + } + + void poly_uniform_4x(poly *a0, +@@ -415,7 +416,7 @@ void poly_uniform_4x(poly *a0, + { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf[4]; +- keccakx4_state state; ++ shake128x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); +@@ -433,6 +434,7 @@ void poly_uniform_4x(poly *a0, + buf[3].coeffs[SEEDBYTES+0] = nonce3; + buf[3].coeffs[SEEDBYTES+1] = nonce3 >> 8; + ++ shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + +@@ -449,6 +451,7 @@ void poly_uniform_4x(poly *a0, + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } ++ shake128x4_inc_ctx_release(&state); + } + + /************************************************* +@@ -530,6 +533,7 @@ void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_eta_preinit(a, &state); ++ stream256_release(&state); + } + + void poly_uniform_eta_4x(poly *a0, +@@ -546,7 +550,7 @@ void poly_uniform_eta_4x(poly *a0, + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; +- keccakx4_state state; ++ shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); +@@ -568,6 +572,7 @@ void poly_uniform_eta_4x(poly *a0, + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + ++ shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + +@@ -584,6 +589,7 @@ void poly_uniform_eta_4x(poly *a0, + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE256_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE256_RATE); + } ++ shake256x4_inc_ctx_release(&state); + } + + /************************************************* +@@ -611,6 +617,7 @@ void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_gamma1_preinit(a, &state); ++ stream256_release(&state); + } + + void poly_uniform_gamma1_4x(poly *a0, +@@ -624,7 +631,7 @@ void poly_uniform_gamma1_4x(poly *a0, + uint16_t nonce3) + { + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf[4]; +- keccakx4_state state; ++ shake256x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); +@@ -647,8 +654,10 @@ void poly_uniform_gamma1_4x(poly *a0, + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + ++ shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); ++ shake256x4_inc_ctx_release(&state); + + polyz_unpack(a0, buf[0].coeffs); + polyz_unpack(a1, buf[1].coeffs); +@@ -670,12 +679,12 @@ void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; +- keccak_state state; ++ shake256incctx state; + +- shake256_init(&state); +- shake256_absorb(&state, seed, SEEDBYTES); +- shake256_finalize(&state); +- shake256_squeezeblocks(buf.coeffs, 1, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, seed, SEEDBYTES); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; +@@ -695,6 +704,7 @@ void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) { + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } ++ shake256_inc_ctx_release(&state); + } + + /************************************************* +diff --git a/avx2/sign.c b/avx2/sign.c +index c8f2398..a39f851 100644 +--- a/avx2/sign.c ++++ b/avx2/sign.c +@@ -161,7 +161,7 @@ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t + polyvecl y; + polyveck w0; + } tmpv; +- keccak_state state; ++ shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; +@@ -172,11 +172,11 @@ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ +- shake256_init(&state); +- shake256_absorb(&state, tr, TRBYTES); +- shake256_absorb(&state, m, mlen); +- shake256_finalize(&state); +- shake256_squeeze(mu, CRHBYTES, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, tr, TRBYTES); ++ shake256_inc_absorb(&state, m, mlen); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(mu, CRHBYTES, &state); + + #ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +@@ -223,11 +223,11 @@ rej: + polyveck_decompose(&w1, &tmpv.w0, &w1); + polyveck_pack_w1(sig, &w1); + +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, sig, K*POLYW1_PACKEDBYTES); +- shake256_finalize(&state); +- shake256_squeeze(sig, CTILDEBYTES, &state); ++ shake256_inc_ctx_reset(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(sig, CTILDEBYTES, &state); + poly_challenge(&c, sig); + poly_ntt(&c); + +@@ -272,6 +272,7 @@ rej: + hint[OMEGA + i] = pos = pos + n; + } + ++ shake256_inc_ctx_release(&state); + /* Pack z into signature */ + for(i = 0; i < L; i++) + polyz_pack(sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES, &z.vec[i]); +@@ -329,18 +330,19 @@ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; +- keccak_state state; ++ shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, m, mlen); +- shake256_finalize(&state); +- shake256_squeeze(mu, CRHBYTES, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, m, mlen); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(mu, CRHBYTES, &state); ++ shake256_inc_ctx_release(&state); + + /* Expand challenge */ + poly_challenge(&c, sig); +@@ -390,11 +392,12 @@ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size + if(hint[j]) return -1; + + /* Call random oracle and verify challenge */ +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES); +- shake256_finalize(&state); +- shake256_squeeze(buf.coeffs, CTILDEBYTES, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(buf.coeffs, CTILDEBYTES, &state); ++ shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(buf.coeffs[i] != sig[i]) + return -1; +diff --git a/avx2/symmetric.h b/avx2/symmetric.h +index 8f3c3c5..fa49963 100644 +--- a/avx2/symmetric.h ++++ b/avx2/symmetric.h +@@ -6,21 +6,23 @@ + + #include "fips202.h" + +-typedef keccak_state stream128_state; +-typedef keccak_state stream256_state; ++typedef shake128incctx stream128_state; ++typedef shake256incctx stream256_state; + + #define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +-void dilithium_shake128_stream_init(keccak_state *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); ++void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); + + #define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +-void dilithium_shake256_stream_init(keccak_state *state, const uint8_t seed[CRHBYTES], uint16_t nonce); ++void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); + + #define STREAM128_BLOCKBYTES SHAKE128_RATE + #define STREAM256_BLOCKBYTES SHAKE256_RATE + + #define stream128_init(STATE, SEED, NONCE) dilithium_shake128_stream_init(STATE, SEED, NONCE) + #define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) ++#define stream128_release(STATE) shake128_inc_ctx_release(STATE) + #define stream256_init(STATE, SEED, NONCE) dilithium_shake256_stream_init(STATE, SEED, NONCE) + #define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) ++#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + #endif +diff --git a/ref/api.h b/ref/api.h +index cc5c6fe..78caa5c 100644 +--- a/ref/api.h ++++ b/ref/api.h +@@ -33,7 +33,7 @@ int pqcrystals_dilithium2_ref_open(uint8_t *m, size_t *mlen, + + #define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 + #define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +-#define pqcrystals_dilithium3_BYTES 3293 ++#define pqcrystals_dilithium3_BYTES 3309 + + #define pqcrystals_dilithium3_ref_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES + #define pqcrystals_dilithium3_ref_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +@@ -60,7 +60,7 @@ int pqcrystals_dilithium3_ref_open(uint8_t *m, size_t *mlen, + + #define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 + #define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +-#define pqcrystals_dilithium5_BYTES 4595 ++#define pqcrystals_dilithium5_BYTES 4627 + + #define pqcrystals_dilithium5_ref_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES + #define pqcrystals_dilithium5_ref_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +diff --git a/ref/config.h b/ref/config.h +index 5ddcd8c..eddf13f 100644 +--- a/ref/config.h ++++ b/ref/config.h +@@ -2,7 +2,7 @@ + #define CONFIG_H + + //#define DILITHIUM_MODE 2 +-//#define DILITHIUM_RANDOMIZED_SIGNING ++#define DILITHIUM_RANDOMIZED_SIGNING + //#define USE_RDPMC + //#define DBENCH + +@@ -11,17 +11,17 @@ + #endif + + #if DILITHIUM_MODE == 2 +-#define CRYPTO_ALGNAME "Dilithium2" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium2_ref +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium2_ref_##s ++#define CRYPTO_ALGNAME "ML-DSA-44-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_ref ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_ref_##s + #elif DILITHIUM_MODE == 3 +-#define CRYPTO_ALGNAME "Dilithium3" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium3_ref +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium3_ref_##s ++#define CRYPTO_ALGNAME "ML-DSA-65-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_ref ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_ref_##s + #elif DILITHIUM_MODE == 5 +-#define CRYPTO_ALGNAME "Dilithium5" +-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium5_ref +-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium5_ref_##s ++#define CRYPTO_ALGNAME "ML-DSA-87-ipd" ++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_ref ++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_ref_##s + #endif + + #endif +diff --git a/ref/packing.h b/ref/packing.h +index 1e8e9e7..8e47728 100644 +--- a/ref/packing.h ++++ b/ref/packing.h +@@ -18,7 +18,7 @@ void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const polyveck *s2); + + #define pack_sig DILITHIUM_NAMESPACE(pack_sig) +-void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); ++void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + + #define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) + void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); +@@ -33,6 +33,6 @@ void unpack_sk(uint8_t rho[SEEDBYTES], + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + + #define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +-int unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); ++int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + + #endif +diff --git a/ref/poly.c b/ref/poly.c +index fe3b787..7983aac 100644 +--- a/ref/poly.c ++++ b/ref/poly.c +@@ -365,6 +365,7 @@ void poly_uniform(poly *a, + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } ++ stream128_release(&state); + } + + /************************************************* +@@ -450,6 +451,7 @@ void poly_uniform_eta(poly *a, + stream256_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM256_BLOCKBYTES); + } ++ stream256_release(&state); + } + + /************************************************* +@@ -473,6 +475,7 @@ void poly_uniform_gamma1(poly *a, + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); ++ stream256_release(&state); + polyz_unpack(a, buf); + } + +@@ -490,11 +493,11 @@ void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; +- keccak_state state; ++ shake256incctx state; + +- shake256_init(&state); +- shake256_absorb(&state, seed, SEEDBYTES); +- shake256_finalize(&state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, seed, SEEDBYTES); ++ shake256_inc_finalize(&state); + shake256_squeezeblocks(buf, 1, &state); + + signs = 0; +@@ -518,6 +521,7 @@ void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } ++ shake256_inc_ctx_release(&state); + } + + /************************************************* +diff --git a/ref/sign.c b/ref/sign.c +index d25a399..9298ad2 100644 +--- a/ref/sign.c ++++ b/ref/sign.c +@@ -90,7 +90,7 @@ int crypto_sign_signature(uint8_t *sig, + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; +- keccak_state state; ++ shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; +@@ -102,11 +102,11 @@ int crypto_sign_signature(uint8_t *sig, + + + /* Compute mu = CRH(tr, msg) */ +- shake256_init(&state); +- shake256_absorb(&state, tr, TRBYTES); +- shake256_absorb(&state, m, mlen); +- shake256_finalize(&state); +- shake256_squeeze(mu, CRHBYTES, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, tr, TRBYTES); ++ shake256_inc_absorb(&state, m, mlen); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(mu, CRHBYTES, &state); + + #ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +@@ -138,11 +138,11 @@ rej: + polyveck_decompose(&w1, &w0, &w1); + polyveck_pack_w1(sig, &w1); + +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, sig, K*POLYW1_PACKEDBYTES); +- shake256_finalize(&state); +- shake256_squeeze(sig, CTILDEBYTES, &state); ++ shake256_inc_ctx_reset(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(sig, CTILDEBYTES, &state); + poly_challenge(&cp, sig); /* uses only the first SEEDBYTES bytes of sig */ + poly_ntt(&cp); + +@@ -175,6 +175,8 @@ rej: + if(n > OMEGA) + goto rej; + ++ shake256_inc_ctx_release(&state); ++ + /* Write signature */ + pack_sig(sig, sig, &z, &h); + *siglen = CRYPTO_BYTES; +@@ -240,7 +242,7 @@ int crypto_sign_verify(const uint8_t *sig, + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; +- keccak_state state; ++ shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; +@@ -253,11 +255,11 @@ int crypto_sign_verify(const uint8_t *sig, + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, m, mlen); +- shake256_finalize(&state); +- shake256_squeeze(mu, CRHBYTES, &state); ++ shake256_inc_init(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, m, mlen); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(mu, CRHBYTES, &state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + poly_challenge(&cp, c); /* uses only the first SEEDBYTES bytes of c */ +@@ -281,11 +283,12 @@ int crypto_sign_verify(const uint8_t *sig, + polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify challenge */ +- shake256_init(&state); +- shake256_absorb(&state, mu, CRHBYTES); +- shake256_absorb(&state, buf, K*POLYW1_PACKEDBYTES); +- shake256_finalize(&state); +- shake256_squeeze(c2, CTILDEBYTES, &state); ++ shake256_inc_ctx_reset(&state); ++ shake256_inc_absorb(&state, mu, CRHBYTES); ++ shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES); ++ shake256_inc_finalize(&state); ++ shake256_inc_squeeze(c2, CTILDEBYTES, &state); ++ shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(c[i] != c2[i]) + return -1; +diff --git a/ref/symmetric-shake.c b/ref/symmetric-shake.c +index 11ec09c..963f649 100644 +--- a/ref/symmetric-shake.c ++++ b/ref/symmetric-shake.c +@@ -3,26 +3,26 @@ + #include "symmetric.h" + #include "fips202.h" + +-void dilithium_shake128_stream_init(keccak_state *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) ++void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) + { + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + +- shake128_init(state); +- shake128_absorb(state, seed, SEEDBYTES); +- shake128_absorb(state, t, 2); +- shake128_finalize(state); ++ shake128_inc_init(state); ++ shake128_inc_absorb(state, seed, SEEDBYTES); ++ shake128_inc_absorb(state, t, 2); ++ shake128_inc_finalize(state); + } + +-void dilithium_shake256_stream_init(keccak_state *state, const uint8_t seed[CRHBYTES], uint16_t nonce) ++void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) + { + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + +- shake256_init(state); +- shake256_absorb(state, seed, CRHBYTES); +- shake256_absorb(state, t, 2); +- shake256_finalize(state); ++ shake256_inc_init(state); ++ shake256_inc_absorb(state, seed, CRHBYTES); ++ shake256_inc_absorb(state, t, 2); ++ shake256_inc_finalize(state); + } +diff --git a/ref/symmetric.h b/ref/symmetric.h +index cba12d1..211de3b 100644 +--- a/ref/symmetric.h ++++ b/ref/symmetric.h +@@ -6,16 +6,16 @@ + + #include "fips202.h" + +-typedef keccak_state stream128_state; +-typedef keccak_state stream256_state; ++typedef shake128incctx stream128_state; ++typedef shake256incctx stream256_state; + + #define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +-void dilithium_shake128_stream_init(keccak_state *state, ++void dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + + #define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +-void dilithium_shake256_stream_init(keccak_state *state, ++void dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +@@ -26,9 +26,11 @@ void dilithium_shake256_stream_init(keccak_state *state, + dilithium_shake128_stream_init(STATE, SEED, NONCE) + #define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) ++#define stream128_release(STATE) shake128_inc_ctx_release(STATE) + #define stream256_init(STATE, SEED, NONCE) \ + dilithium_shake256_stream_init(STATE, SEED, NONCE) + #define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) ++#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + #endif diff --git a/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch b/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch new file mode 100644 index 0000000000..ba138bf3cd --- /dev/null +++ b/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch @@ -0,0 +1,448 @@ +diff --git a/Kyber1024_META.yml b/ML-KEM-1024-ipd_META.yml +index baa5ca3..ffafcf0 100644 +--- a/Kyber1024_META.yml ++++ b/ML-KEM-1024-ipd_META.yml +@@ -1,4 +1,4 @@ +-name: Kyber1024 ++name: ML-KEM-1024-ipd + type: kem + claimed-nist-level: 5 + claimed-security: IND-CCA2 +@@ -6,8 +6,8 @@ length-public-key: 1568 + length-ciphertext: 1568 + length-secret-key: 3168 + length-shared-secret: 32 +-nistkat-sha256: 5afcf2a568ad32d49b55105b032af1850f03f3888ff9e2a72f4059c58e968f60 +-testvectors-sha256: ff1a854b9b6761a70c65ccae85246fe0596a949e72eae0866a8a2a2d4ea54b10 ++nistkat-sha256: 03d6494b74c45d010e61b0328c1ab318c4df3b7f9dbd04d0e35b3468848584b7 ++testvectors-sha256: 85ab251d6e749e6b27507a8a6ec473ba2e8419c1aef87d0cd5ec9903c1bb92df + principal-submitters: + - Peter Schwabe + auxiliary-submitters: +@@ -22,22 +22,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff ++ version: https://github.com/pq-crystals/kyber/tree/standard + folder_name: ref + compile_opts: -DKYBER_K=4 +- signature_keypair: pqcrystals_kyber1024_ref_keypair +- signature_enc: pqcrystals_kyber1024_ref_enc +- signature_dec: pqcrystals_kyber1024_ref_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ signature_keypair: pqcrystals_ml_kem_1024_ipd_ref_keypair ++ signature_enc: pqcrystals_ml_kem_1024_ipd_ref_enc ++ signature_dec: pqcrystals_ml_kem_1024_ipd_ref_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff ++ version: https://github.com/pq-crystals/kyber/tree/standard + compile_opts: -DKYBER_K=4 +- signature_keypair: pqcrystals_kyber1024_avx2_keypair +- signature_enc: pqcrystals_kyber1024_avx2_enc +- signature_dec: pqcrystals_kyber1024_avx2_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 common_keccak4x_avx2 ++ signature_keypair: pqcrystals_ml_kem_1024_ipd_avx2_keypair ++ signature_enc: pqcrystals_ml_kem_1024_ipd_avx2_enc ++ signature_dec: pqcrystals_ml_kem_1024_ipd_avx2_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/Kyber512_META.yml b/ML-KEM-512-ipd_META.yml +index b251701..d20f0b1 100644 +--- a/Kyber512_META.yml ++++ b/ML-KEM-512-ipd_META.yml +@@ -1,4 +1,4 @@ +-name: Kyber512 ++name: ML-KEM-512-ipd + type: kem + claimed-nist-level: 1 + claimed-security: IND-CCA2 +@@ -6,8 +6,8 @@ length-public-key: 800 + length-ciphertext: 768 + length-secret-key: 1632 + length-shared-secret: 32 +-nistkat-sha256: bb0481d3325d828817900b709d23917cefbc10026fc857f098979451f67bb0ca +-testvectors-sha256: 6730bb552c22d9d2176ffb5568e48eb30952cf1f065073ec5f9724f6a3c6ea85 ++nistkat-sha256: 76aae1fa3f8367522700b22da635a5bc4ced4298edb0eb9947aa3ba60d62676f ++testvectors-sha256: e1ac6fb45e2511f4170a3527c0c50dcd61336f47113df7a299a61ef8394bd669 + principal-submitters: + - Peter Schwabe + auxiliary-submitters: +@@ -22,22 +22,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/kyber/commit/74cad307858b61e434490c75f812cb9b9ef7279b ++ version: https://github.com/pq-crystals/kyber/tree/standard + folder_name: ref + compile_opts: -DKYBER_K=2 +- signature_keypair: pqcrystals_kyber512_ref_keypair +- signature_enc: pqcrystals_kyber512_ref_enc +- signature_dec: pqcrystals_kyber512_ref_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ signature_keypair: pqcrystals_ml_kem_512_ipd_ref_keypair ++ signature_enc: pqcrystals_ml_kem_512_ipd_ref_enc ++ signature_dec: pqcrystals_ml_kem_512_ipd_ref_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/kyber/commit/36414d64fc1890ed58d1ca8b1e0cab23635d1ac2 ++ version: https://github.com/pq-crystals/kyber/tree/standard + compile_opts: -DKYBER_K=2 +- signature_keypair: pqcrystals_kyber512_avx2_keypair +- signature_enc: pqcrystals_kyber512_avx2_enc +- signature_dec: pqcrystals_kyber512_avx2_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 common_keccak4x_avx2 ++ signature_keypair: pqcrystals_ml_kem_512_ipd_avx2_keypair ++ signature_enc: pqcrystals_ml_kem_512_ipd_avx2_enc ++ signature_dec: pqcrystals_ml_kem_512_ipd_avx2_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/Kyber768_META.yml b/ML-KEM-768-ipd_META.yml +index 7a0cc3d..e768cd5 100644 +--- a/Kyber768_META.yml ++++ b/ML-KEM-768-ipd_META.yml +@@ -1,4 +1,4 @@ +-name: Kyber768 ++name: ML-KEM-768-ipd + type: kem + claimed-nist-level: 3 + claimed-security: IND-CCA2 +@@ -6,8 +6,8 @@ length-public-key: 1184 + length-ciphertext: 1088 + length-secret-key: 2400 + length-shared-secret: 32 +-nistkat-sha256: 89e82a5bf2d4ddb2c6444e10409e6d9ca65dafbca67d1a0db2c9b54920a29172 +-testvectors-sha256: 667c8ca2ca93729c0df6ff24588460bad1bbdbfb64ece0fe8563852a7ff348c6 ++nistkat-sha256: c7e76b4b30c786b5b70c152a446e7832c1cb42b3816ec048dbeaf7041211b310 ++testvectors-sha256: 2586721a714c439f6fef26e29ee1c4c67c6207186f810617f278e6ce3e67ea0d + principal-submitters: + - Peter Schwabe + auxiliary-submitters: +@@ -22,22 +22,20 @@ auxiliary-submitters: + - Damien Stehlé + implementations: + - name: ref +- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff ++ version: https://github.com/pq-crystals/kyber/tree/standard + folder_name: ref + compile_opts: -DKYBER_K=3 +- signature_keypair: pqcrystals_kyber768_ref_keypair +- signature_enc: pqcrystals_kyber768_ref_enc +- signature_dec: pqcrystals_kyber768_ref_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c +- common_dep: common_ref ++ signature_keypair: pqcrystals_ml_kem_768_ipd_ref_keypair ++ signature_enc: pqcrystals_ml_kem_768_ipd_ref_enc ++ signature_dec: pqcrystals_ml_kem_768_ipd_ref_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c + - name: avx2 +- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff ++ version: https://github.com/pq-crystals/kyber/tree/standard + compile_opts: -DKYBER_K=3 +- signature_keypair: pqcrystals_kyber768_avx2_keypair +- signature_enc: pqcrystals_kyber768_avx2_enc +- signature_dec: pqcrystals_kyber768_avx2_dec +- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c +- common_dep: common_avx2 common_keccak4x_avx2 ++ signature_keypair: pqcrystals_ml_kem_768_ipd_avx2_keypair ++ signature_enc: pqcrystals_ml_kem_768_ipd_avx2_enc ++ signature_dec: pqcrystals_ml_kem_768_ipd_avx2_dec ++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c + supported_platforms: + - architecture: x86_64 + operating_systems: +diff --git a/avx2/indcpa.c b/avx2/indcpa.c +index 4f3b782..572ce49 100644 +--- a/avx2/indcpa.c ++++ b/avx2/indcpa.c +@@ -175,7 +175,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; +- keccakx4_state state; ++ shake128x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); +@@ -204,6 +204,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + buf[3].coeffs[33] = 1; + } + ++ shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + +@@ -225,6 +226,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[1].vec[0]); + poly_nttunpack(&a[1].vec[1]); ++ shake128x4_inc_ctx_release(&state); + } + #elif KYBER_K == 3 + void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +@@ -232,8 +234,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; +- keccakx4_state state; +- keccak_state state1x; ++ shake128x4incctx state; ++ shake128incctx state1x; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); +@@ -262,6 +264,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + buf[3].coeffs[33] = 1; + } + ++ shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + +@@ -327,6 +330,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } ++ shake128x4_inc_ctx_release(&state); + + poly_nttunpack(&a[1].vec[1]); + poly_nttunpack(&a[1].vec[2]); +@@ -337,6 +341,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + _mm256_store_si256(buf[0].vec, f); + buf[0].coeffs[32] = 2; + buf[0].coeffs[33] = 2; ++ ++ shake128_inc_init(&state1x); + shake128_absorb_once(&state1x, buf[0].coeffs, 34); + shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); + ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); +@@ -344,6 +350,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); + ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + } ++ shake128_inc_ctx_release(&state1x); + + poly_nttunpack(&a[2].vec[2]); + } +@@ -353,7 +360,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + unsigned int i, ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; +- keccakx4_state state; ++ shake128x4incctx state; ++ shake128x4_inc_init(&state); + + for(i=0;i<4;i++) { + f = _mm256_loadu_si256((__m256i *)seed); +@@ -405,6 +413,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) + poly_nttunpack(&a[i].vec[2]); + poly_nttunpack(&a[i].vec[3]); + } ++ shake128x4_inc_ctx_release(&state); + } + #endif + +diff --git a/avx2/params.h b/avx2/params.h +index bc70ebf..fdc688e 100644 +--- a/avx2/params.h ++++ b/avx2/params.h +@@ -12,19 +12,19 @@ + #ifdef KYBER_90S + #define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s + #else +-#define KYBER_NAMESPACE(s) pqcrystals_kyber512_avx2_##s ++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s + #endif + #elif (KYBER_K == 3) + #ifdef KYBER_90S + #define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s + #else +-#define KYBER_NAMESPACE(s) pqcrystals_kyber768_avx2_##s ++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s + #endif + #elif (KYBER_K == 4) + #ifdef KYBER_90S + #define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s + #else +-#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_avx2_##s ++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s + #endif + #else + #error "KYBER_K must be in {2,3,4}" +diff --git a/avx2/poly.c b/avx2/poly.c +index ab148a2..96bad86 100644 +--- a/avx2/poly.c ++++ b/avx2/poly.c +@@ -2,6 +2,7 @@ + #include + #include + #include "align.h" ++#include "fips202x4.h" + #include "params.h" + #include "poly.h" + #include "ntt.h" +@@ -412,7 +413,7 @@ void poly_getnoise_eta1_4x(poly *r0, + { + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; +- keccakx4_state state; ++ shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); +@@ -425,8 +426,10 @@ void poly_getnoise_eta1_4x(poly *r0, + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + ++ shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); ++ shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); +@@ -447,7 +450,7 @@ void poly_getnoise_eta1122_4x(poly *r0, + { + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; +- keccakx4_state state; ++ shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); +@@ -460,8 +463,10 @@ void poly_getnoise_eta1122_4x(poly *r0, + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + ++ shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); ++ shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); +diff --git a/avx2/symmetric.h b/avx2/symmetric.h +index 627b891..e4941f7 100644 +--- a/avx2/symmetric.h ++++ b/avx2/symmetric.h +@@ -8,10 +8,10 @@ + #include "fips202.h" + #include "fips202x4.h" + +-typedef keccak_state xof_state; ++typedef shake128incctx xof_state; + + #define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +-void kyber_shake128_absorb(keccak_state *s, ++void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); +diff --git a/ref/indcpa.c b/ref/indcpa.c +index 5d74518..4a8b4c8 100644 +--- a/ref/indcpa.c ++++ b/ref/indcpa.c +@@ -164,6 +164,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) + unsigned int buflen, off; + uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; + xof_state state; ++ xof_init(&state, seed); + + for(i=0;i {% for scheme in schemes -%} -#ifdef OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }} +#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}){%- endif %} #define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_public_key {{ scheme['metadata']['length-public-key'] }} #define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_secret_key {{ scheme['metadata']['length-secret-key'] }} #define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext {{ scheme['metadata']['length-ciphertext'] }} @@ -15,6 +15,16 @@ OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_new(void); OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *public_key, uint8_t *secret_key); OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key); OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); +{% if 'alias_scheme' in scheme %} +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_public_key +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_secret_key +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_shared_secret +OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_new(void); +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_keypair OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_encaps OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps +#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_decaps OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps +{% endif -%} #endif {% endfor -%} diff --git a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c index 027a88e76f..058d829e8b 100644 --- a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c +++ b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c @@ -31,6 +31,34 @@ OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_new(void) { return kem; } +{%- if 'alias_scheme' in scheme %} + +/** Alias */ +OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_{{ family }}_{{ scheme['alias_scheme'] }}; + kem->alg_version = "{{ scheme['metadata']['implementations'][0]['version'] }}"; + + kem->claimed_nist_level = {{ scheme['metadata']['claimed-nist-level'] }}; + kem->ind_cca = {{ scheme['metadata']['ind_cca'] }}; + + kem->length_public_key = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key; + kem->length_secret_key = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key; + kem->length_ciphertext = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext; + kem->length_shared_secret = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret; + + kem->keypair = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_keypair; + kem->encaps = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_encaps; + kem->decaps = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_decaps; + + return kem; +} +{%- endif -%} + {%- for impl in scheme['metadata']['implementations'] if impl['name'] == scheme['default_implementation'] %} {%- if impl['signature_keypair'] %} diff --git a/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment b/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment index b11c404684..c6729cb653 100644 --- a/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment +++ b/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment @@ -1,3 +1,7 @@ {% for family in instructions['kems'] %}{% for scheme in family['schemes'] %} - OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }},{% endfor %}{% endfor %} + OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }}, +{%- if 'alias_scheme' in scheme %} + OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }}, +{%- endif -%} +{% endfor %}{% endfor %} \ No newline at end of file diff --git a/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment b/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment index da1ff1d885..ba58364ee5 100644 --- a/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment +++ b/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment @@ -4,5 +4,14 @@ return 1; #else return 0; -#endif{% endfor %}{% endfor %} +#endif +{% if 'alias_scheme' in scheme %} + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) { +#ifdef OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} + return 1; +#else + return 0; +#endif +{% endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment b/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment index af0b40b04d..63db6d636a 100644 --- a/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment +++ b/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment @@ -4,5 +4,14 @@ return OQS_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_new(); #else return NULL; -#endif{% endfor %}{% endfor %} +#endif +{% if 'alias_scheme' in scheme %} + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) { +#ifdef OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} + return OQS_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_new(); +#else + return NULL; +#endif +{% endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment b/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment index 635a7d6622..e3166e7ccb 100644 --- a/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment +++ b/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment @@ -1,4 +1,9 @@ {% for family in instructions['kems'] %}{% for scheme in family['schemes'] %} /** Algorithm identifier for {{ scheme['pretty_name_full'] }} KEM. */ -#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"{% endfor %}{% endfor %} +#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}" +{%- if 'alias_scheme' in scheme %} +/** Algorithm identifier for {{ scheme['alias_pretty_name_full'] }} KEM. */ +#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "{{ scheme['alias_pretty_name_full'] }}" +{%- endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment b/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment index e86a2a0faf..79e87d08b7 100644 --- a/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment +++ b/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment @@ -1,4 +1,5 @@ {% set unary %}{% for family in instructions['kems'] %}{% for scheme in family['schemes'] %}1{% endfor %}{% endfor %}{% endset %} +{% set unary_alias %}{% for family in instructions['kems'] %}{% for scheme in family['schemes'] if 'alias_scheme' in scheme %}2{% endfor %}{% endfor %}{% endset %} /** Number of algorithm identifiers above. */ -#define OQS_KEM_algs_length {{ unary|length + non_upstream_kems }} +#define OQS_KEM_algs_length {{ unary|length + unary_alias|length + non_upstream_kems }} diff --git a/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment b/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment index 82d5d9b8df..2bc517ac22 100644 --- a/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment +++ b/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment @@ -3,6 +3,9 @@ #cmakedefine OQS_ENABLE_KEM_{{ family['name']|upper }} 1 {%- for scheme in family['schemes'] %} #cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }} 1 +{%- if 'alias_scheme' in scheme %} +#cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} 1 +{%- endif -%} {%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] %} #cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} 1 {%- endfor -%} @@ -14,6 +17,9 @@ #cmakedefine OQS_ENABLE_SIG_{{ family['name']|upper }} 1 {%- for scheme in family['schemes'] %} #cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }} 1 +{%- if 'alias_scheme' in scheme %} +#cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} 1 +{%- endif -%} {%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] %} #cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} 1 {%- endfor -%} diff --git a/scripts/copy_from_upstream/src/sig/family/sig_family.h b/scripts/copy_from_upstream/src/sig/family/sig_family.h index 2af20417ca..b17d621635 100644 --- a/scripts/copy_from_upstream/src/sig/family/sig_family.h +++ b/scripts/copy_from_upstream/src/sig/family/sig_family.h @@ -6,7 +6,7 @@ #include {% for scheme in schemes -%} -#ifdef OQS_ENABLE_SIG_{{ family }}_{{ scheme['scheme'] }} +#if defined(OQS_ENABLE_SIG_{{ family }}_{{ scheme['scheme'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_SIG_{{ family }}_{{ scheme['alias_scheme'] }}){%- endif %} #define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key {{ scheme['metadata']['length-public-key'] }} #define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key {{ scheme['metadata']['length-secret-key'] }} #define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_signature {{ scheme['metadata']['length-signature'] }} @@ -15,6 +15,16 @@ OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_new(void); OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *public_key, uint8_t *secret_key); OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key); OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); +{% if 'alias_scheme' in scheme %} +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_shared_secret +OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_new(void); +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_keypair OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_encaps OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_encaps +#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_decaps OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_decaps +{% endif -%} #endif {% endfor -%} diff --git a/scripts/copy_from_upstream/src/sig/family/sig_scheme.c b/scripts/copy_from_upstream/src/sig/family/sig_scheme.c index 5b8927a83c..928ef3d65f 100644 --- a/scripts/copy_from_upstream/src/sig/family/sig_scheme.c +++ b/scripts/copy_from_upstream/src/sig/family/sig_scheme.c @@ -30,6 +30,33 @@ OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_new(void) { return sig; } +{%- if 'alias_scheme' in scheme %} + +/** Alias */ +OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_{{ family }}_{{ scheme['alias_scheme'] }}; + sig->alg_version = "{{ scheme['metadata']['implementations'][0]['version'] }}"; + + sig->claimed_nist_level = {{ scheme['metadata']['claimed-nist-level'] }}; + sig->euf_cma = {{ scheme['metadata']['euf_cma'] }}; + + sig->length_public_key = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key; + sig->length_secret_key = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key; + sig->length_signature = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_signature; + + sig->keypair = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair; + sig->sign = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_sign; + sig->verify = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_verify; + + return sig; +} +{%- endif -%} + {%- for impl in scheme['metadata']['implementations'] if impl['name'] == scheme['default_implementation'] %} {%- if impl['signature_keypair'] %} diff --git a/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment b/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment index 3b4e54ad29..87d9c6fa5f 100644 --- a/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment +++ b/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment @@ -1,3 +1,6 @@ {% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %} - OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }},{% endfor %}{% endfor %} - \ No newline at end of file + OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }}, +{%- if 'alias_scheme' in scheme %} + OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }}, +{%- endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment b/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment index c85d08adff..8ff702c3f9 100644 --- a/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment +++ b/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment @@ -4,5 +4,13 @@ return 1; #else return 0; -#endif{% endfor %}{% endfor %} - +#endif +{% if 'alias_scheme' in scheme %} + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) { +#ifdef OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} + return 1; +#else + return 0; +#endif +{% endif -%} +{% endfor %}{% endfor %} \ No newline at end of file diff --git a/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment b/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment index e874f8e14f..2600f9b35d 100644 --- a/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment +++ b/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment @@ -4,5 +4,13 @@ return OQS_SIG_{{ family['name'] }}_{{ scheme['scheme'] }}_new(); #else return NULL; -#endif{% endfor %}{% endfor %} - +#endif +{% if 'alias_scheme' in scheme %} + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) { +#ifdef OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} + return OQS_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_new(); +#else + return NULL; +#endif +{% endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment b/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment index dc940087b3..9de830f9ab 100644 --- a/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment +++ b/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment @@ -1,4 +1,9 @@ {% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %} /** Algorithm identifier for {{ scheme['pretty_name_full'] }} */ -#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"{% endfor %}{% endfor %} +#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}" +{%- if 'alias_scheme' in scheme %} +/** Algorithm identifier for {{ scheme['alias_pretty_name_full'] }} SIG. */ +#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "{{ scheme['alias_pretty_name_full'] }}" +{%- endif -%} +{% endfor %}{% endfor %} diff --git a/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment b/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment index ea35d7ab55..0ac7133145 100644 --- a/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment +++ b/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment @@ -1,4 +1,5 @@ {% set unary %}{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %}1{% endfor %}{% endfor %}{% endset %} +{% set unary_alias %}{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] if 'alias_scheme' in scheme %}2{% endfor %}{% endfor %}{% endset %} /** Number of algorithm identifiers above. */ -#define OQS_SIG_algs_length {{ unary|length }} +#define OQS_SIG_algs_length {{ unary|length + unary_alias|length }} diff --git a/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment b/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment index c76d13512a..25609f254e 100644 --- a/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment +++ b/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment @@ -1,5 +1,5 @@ {% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %} - } else if (0 == strcmp(sig->method_name, "{{ scheme['pretty_name_full'] }}")) { + } else if (0 == strcmp(sig->method_name, "{{ scheme['pretty_name_full'] }}"){%- if 'alias_scheme' in scheme %} || 0 == strcmp(sig->method_name, "{{ scheme['alias_pretty_name_full'] }}"){%- endif -%}) { {%- if scheme['signed_msg_order'] == 'sig_then_msg' %} // signed_msg = signature || msg *signed_msg_len = signature_len + msg_len; diff --git a/scripts/update_docs_from_yaml.py b/scripts/update_docs_from_yaml.py index 96a4b1887c..ef152d376a 100644 --- a/scripts/update_docs_from_yaml.py +++ b/scripts/update_docs_from_yaml.py @@ -62,6 +62,7 @@ def do_it(liboqs_root): out_md.write('\n## Parameter set summary\n\n') table = [['Parameter set', + 'Parameter set alias', 'Security model', 'Claimed NIST Level', 'Public key size (bytes)', @@ -70,6 +71,7 @@ def do_it(liboqs_root): 'Shared secret size (bytes)']] for parameter_set in kem_yaml['parameter-sets']: table.append([parameter_set['name'], + parameter_set['alias'] if 'alias' in parameter_set else "NA", parameter_set['claimed-security'], parameter_set['claimed-nist-level'], parameter_set['length-public-key'], @@ -186,6 +188,7 @@ def do_it(liboqs_root): out_md.write('\n## Parameter set summary\n\n') table = [['Parameter set', + 'Parameter set alias', 'Security model', 'Claimed NIST Level', 'Public key size (bytes)', @@ -193,6 +196,7 @@ def do_it(liboqs_root): 'Signature size (bytes)']] for parameter_set in sig_yaml['parameter-sets']: table.append([parameter_set['name'].replace('_', '\_'), + parameter_set['alias'] if 'alias' in parameter_set else "NA", parameter_set['claimed-security'], parameter_set['claimed-nist-level'], parameter_set['length-public-key'], @@ -291,13 +295,21 @@ def do_it(liboqs_root): parameter_sets = kem_yaml['parameter-sets'] if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']): readme.write('- **{}**: {}†'.format(kem_yaml['name'], parameter_sets[0]['name'])) + if 'alias' in parameter_sets[0]: + readme.write(' (alias: {})'.format(parameter_sets[0]['alias'])) else: readme.write('- **{}**: {}'.format(kem_yaml['name'], parameter_sets[0]['name'])) + if 'alias' in parameter_sets[0]: + readme.write(' (alias: {})'.format(parameter_sets[0]['alias'])) for parameter_set in parameter_sets[1:]: if any(impl['large-stack-usage'] for impl in parameter_set['implementations']): readme.write(', {}†'.format(parameter_set['name'])) + if 'alias' in parameter_set: + readme.write(' (alias: {})'.format(parameter_set['alias'])) else: readme.write(', {}'.format(parameter_set['name'])) + if 'alias' in parameter_set: + readme.write(' (alias: {})'.format(parameter_set['alias'])) readme.write('\n') readme.write(postamble) @@ -318,13 +330,21 @@ def do_it(liboqs_root): parameter_sets = sig_yaml['parameter-sets'] if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']): readme.write('- **{}**: {}†'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_'))) + if 'alias' in parameter_sets[0]: + readme.write(' (alias: {})'.format(parameter_sets[0]['alias']).replace('_','\_')) else: readme.write('- **{}**: {}'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_'))) + if 'alias' in parameter_sets[0]: + readme.write(' (alias: {})'.format(parameter_sets[0]['alias']).replace('_','\_')) for parameter_set in parameter_sets[1:]: if any(impl['large-stack-usage'] for impl in parameter_set['implementations']): readme.write(', {}†'.format(parameter_set['name'].replace('_', '\_'))) + if 'alias' in parameter_set: + readme.write(' (alias: {})'.format(parameter_set['alias']).replace('_','\_')) else: readme.write(', {}'.format(parameter_set['name'].replace('_', '\_'))) + if 'alias' in parameter_set: + readme.write(' (alias: {})'.format(parameter_set['alias']).replace('_','\_')) readme.write('\n') sphincs_yml = sig_yamls[-1] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca5adf4070..1f9ed06e5c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,10 +34,18 @@ if(OQS_ENABLE_KEM_KYBER) add_subdirectory(kem/kyber) set(KEM_OBJS ${KEM_OBJS} ${KYBER_OBJS}) endif() +if(OQS_ENABLE_KEM_ML_KEM) + add_subdirectory(kem/ml_kem) + set(KEM_OBJS ${KEM_OBJS} ${ML_KEM_OBJS}) +endif() if(OQS_ENABLE_SIG_DILITHIUM) add_subdirectory(sig/dilithium) set(SIG_OBJS ${SIG_OBJS} ${DILITHIUM_OBJS}) endif() +if(OQS_ENABLE_SIG_ML_DSA) + add_subdirectory(sig/ml_dsa) + set(SIG_OBJS ${SIG_OBJS} ${ML_DSA_OBJS}) +endif() if(OQS_ENABLE_SIG_FALCON) add_subdirectory(sig/falcon) set(SIG_OBJS ${SIG_OBJS} ${FALCON_OBJS}) diff --git a/src/kem/classic_mceliece/kem_classic_mceliece.h b/src/kem/classic_mceliece/kem_classic_mceliece.h index 766b751c59..2bbd969820 100644 --- a/src/kem/classic_mceliece/kem_classic_mceliece.h +++ b/src/kem/classic_mceliece/kem_classic_mceliece.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_KEM_classic_mceliece_348864 +#if defined(OQS_ENABLE_KEM_classic_mceliece_348864) #define OQS_KEM_classic_mceliece_348864_length_public_key 261120 #define OQS_KEM_classic_mceliece_348864_length_secret_key 6492 #define OQS_KEM_classic_mceliece_348864_length_ciphertext 96 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864_encaps(uint8_t *ciphertext, u OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_348864f +#if defined(OQS_ENABLE_KEM_classic_mceliece_348864f) #define OQS_KEM_classic_mceliece_348864f_length_public_key 261120 #define OQS_KEM_classic_mceliece_348864f_length_secret_key 6492 #define OQS_KEM_classic_mceliece_348864f_length_ciphertext 96 @@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864f_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_460896 +#if defined(OQS_ENABLE_KEM_classic_mceliece_460896) #define OQS_KEM_classic_mceliece_460896_length_public_key 524160 #define OQS_KEM_classic_mceliece_460896_length_secret_key 13608 #define OQS_KEM_classic_mceliece_460896_length_ciphertext 156 @@ -38,7 +38,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896_encaps(uint8_t *ciphertext, u OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_460896f +#if defined(OQS_ENABLE_KEM_classic_mceliece_460896f) #define OQS_KEM_classic_mceliece_460896f_length_public_key 524160 #define OQS_KEM_classic_mceliece_460896f_length_secret_key 13608 #define OQS_KEM_classic_mceliece_460896f_length_ciphertext 156 @@ -49,7 +49,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896f_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128 +#if defined(OQS_ENABLE_KEM_classic_mceliece_6688128) #define OQS_KEM_classic_mceliece_6688128_length_public_key 1044992 #define OQS_KEM_classic_mceliece_6688128_length_secret_key 13932 #define OQS_KEM_classic_mceliece_6688128_length_ciphertext 208 @@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f +#if defined(OQS_ENABLE_KEM_classic_mceliece_6688128f) #define OQS_KEM_classic_mceliece_6688128f_length_public_key 1044992 #define OQS_KEM_classic_mceliece_6688128f_length_secret_key 13932 #define OQS_KEM_classic_mceliece_6688128f_length_ciphertext 208 @@ -71,7 +71,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128f_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119 +#if defined(OQS_ENABLE_KEM_classic_mceliece_6960119) #define OQS_KEM_classic_mceliece_6960119_length_public_key 1047319 #define OQS_KEM_classic_mceliece_6960119_length_secret_key 13948 #define OQS_KEM_classic_mceliece_6960119_length_ciphertext 194 @@ -82,7 +82,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f +#if defined(OQS_ENABLE_KEM_classic_mceliece_6960119f) #define OQS_KEM_classic_mceliece_6960119f_length_public_key 1047319 #define OQS_KEM_classic_mceliece_6960119f_length_secret_key 13948 #define OQS_KEM_classic_mceliece_6960119f_length_ciphertext 194 @@ -93,7 +93,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119f_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128 +#if defined(OQS_ENABLE_KEM_classic_mceliece_8192128) #define OQS_KEM_classic_mceliece_8192128_length_public_key 1357824 #define OQS_KEM_classic_mceliece_8192128_length_secret_key 14120 #define OQS_KEM_classic_mceliece_8192128_length_ciphertext 208 @@ -104,7 +104,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_8192128_encaps(uint8_t *ciphertext, OQS_API OQS_STATUS OQS_KEM_classic_mceliece_8192128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f +#if defined(OQS_ENABLE_KEM_classic_mceliece_8192128f) #define OQS_KEM_classic_mceliece_8192128f_length_public_key 1357824 #define OQS_KEM_classic_mceliece_8192128f_length_secret_key 14120 #define OQS_KEM_classic_mceliece_8192128f_length_ciphertext 208 diff --git a/src/kem/hqc/kem_hqc.h b/src/kem/hqc/kem_hqc.h index 1df06e1c11..b1f022374d 100644 --- a/src/kem/hqc/kem_hqc.h +++ b/src/kem/hqc/kem_hqc.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_KEM_hqc_128 +#if defined(OQS_ENABLE_KEM_hqc_128) #define OQS_KEM_hqc_128_length_public_key 2249 #define OQS_KEM_hqc_128_length_secret_key 2305 #define OQS_KEM_hqc_128_length_ciphertext 4433 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_hqc_128_encaps(uint8_t *ciphertext, uint8_t *shared_s OQS_API OQS_STATUS OQS_KEM_hqc_128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_hqc_192 +#if defined(OQS_ENABLE_KEM_hqc_192) #define OQS_KEM_hqc_192_length_public_key 4522 #define OQS_KEM_hqc_192_length_secret_key 4586 #define OQS_KEM_hqc_192_length_ciphertext 8978 @@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_hqc_192_encaps(uint8_t *ciphertext, uint8_t *shared_s OQS_API OQS_STATUS OQS_KEM_hqc_192_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_hqc_256 +#if defined(OQS_ENABLE_KEM_hqc_256) #define OQS_KEM_hqc_256_length_public_key 7245 #define OQS_KEM_hqc_256_length_secret_key 7317 #define OQS_KEM_hqc_256_length_ciphertext 14421 diff --git a/src/kem/kem.c b/src/kem/kem.c index 01448af121..0a340e4f38 100644 --- a/src/kem/kem.c +++ b/src/kem/kem.c @@ -34,6 +34,12 @@ OQS_API const char *OQS_KEM_alg_identifier(size_t i) { OQS_KEM_alg_kyber_512, OQS_KEM_alg_kyber_768, OQS_KEM_alg_kyber_1024, + OQS_KEM_alg_ml_kem_512_ipd, + OQS_KEM_alg_ml_kem_512, + OQS_KEM_alg_ml_kem_768_ipd, + OQS_KEM_alg_ml_kem_768, + OQS_KEM_alg_ml_kem_1024_ipd, + OQS_KEM_alg_ml_kem_1024, ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END OQS_KEM_alg_ntruprime_sntrup761, OQS_KEM_alg_frodokem_640_aes, @@ -82,96 +88,154 @@ OQS_API int OQS_KEM_alg_is_enabled(const char *method_name) { #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_348864f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_348864f return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_460896 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_460896f return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6688128 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6960119 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_8192128 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_128)) { #ifdef OQS_ENABLE_KEM_hqc_128 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_192)) { #ifdef OQS_ENABLE_KEM_hqc_192 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_256)) { #ifdef OQS_ENABLE_KEM_hqc_256 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_512)) { #ifdef OQS_ENABLE_KEM_kyber_512 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_768)) { #ifdef OQS_ENABLE_KEM_kyber_768 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_1024)) { #ifdef OQS_ENABLE_KEM_kyber_1024 return 1; #else return 0; #endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_512_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512)) { +#ifdef OQS_ENABLE_KEM_ml_kem_512 + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_768_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768)) { +#ifdef OQS_ENABLE_KEM_ml_kem_768 + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_1024_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024)) { +#ifdef OQS_ENABLE_KEM_ml_kem_1024 + return 1; +#else + return 0; +#endif + ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ENABLED_CASE_END } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ntruprime_sntrup761)) { #ifdef OQS_ENABLE_KEM_ntruprime_sntrup761 @@ -250,96 +314,154 @@ OQS_API OQS_KEM *OQS_KEM_new(const char *method_name) { #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_348864f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_348864f return OQS_KEM_classic_mceliece_348864f_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_460896 return OQS_KEM_classic_mceliece_460896_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_460896f return OQS_KEM_classic_mceliece_460896f_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6688128 return OQS_KEM_classic_mceliece_6688128_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f return OQS_KEM_classic_mceliece_6688128f_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6960119 return OQS_KEM_classic_mceliece_6960119_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f return OQS_KEM_classic_mceliece_6960119f_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_8192128 return OQS_KEM_classic_mceliece_8192128_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128f)) { #ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f return OQS_KEM_classic_mceliece_8192128f_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_128)) { #ifdef OQS_ENABLE_KEM_hqc_128 return OQS_KEM_hqc_128_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_192)) { #ifdef OQS_ENABLE_KEM_hqc_192 return OQS_KEM_hqc_192_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_256)) { #ifdef OQS_ENABLE_KEM_hqc_256 return OQS_KEM_hqc_256_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_512)) { #ifdef OQS_ENABLE_KEM_kyber_512 return OQS_KEM_kyber_512_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_768)) { #ifdef OQS_ENABLE_KEM_kyber_768 return OQS_KEM_kyber_768_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_1024)) { #ifdef OQS_ENABLE_KEM_kyber_1024 return OQS_KEM_kyber_1024_new(); #else return NULL; #endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_512_ipd + return OQS_KEM_ml_kem_512_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512)) { +#ifdef OQS_ENABLE_KEM_ml_kem_512 + return OQS_KEM_ml_kem_512_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_768_ipd + return OQS_KEM_ml_kem_768_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768)) { +#ifdef OQS_ENABLE_KEM_ml_kem_768 + return OQS_KEM_ml_kem_768_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024_ipd)) { +#ifdef OQS_ENABLE_KEM_ml_kem_1024_ipd + return OQS_KEM_ml_kem_1024_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024)) { +#ifdef OQS_ENABLE_KEM_ml_kem_1024 + return OQS_KEM_ml_kem_1024_new(); +#else + return NULL; +#endif + ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_NEW_CASE_END } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ntruprime_sntrup761)) { #ifdef OQS_ENABLE_KEM_ntruprime_sntrup761 diff --git a/src/kem/kem.h b/src/kem/kem.h index e2ea8d9d67..0e579c0477 100644 --- a/src/kem/kem.h +++ b/src/kem/kem.h @@ -70,6 +70,18 @@ extern "C" { #define OQS_KEM_alg_kyber_768 "Kyber768" /** Algorithm identifier for Kyber1024 KEM. */ #define OQS_KEM_alg_kyber_1024 "Kyber1024" +/** Algorithm identifier for ML-KEM-512-ipd KEM. */ +#define OQS_KEM_alg_ml_kem_512_ipd "ML-KEM-512-ipd" +/** Algorithm identifier for ML-KEM-512 KEM. */ +#define OQS_KEM_alg_ml_kem_512 "ML-KEM-512" +/** Algorithm identifier for ML-KEM-768-ipd KEM. */ +#define OQS_KEM_alg_ml_kem_768_ipd "ML-KEM-768-ipd" +/** Algorithm identifier for ML-KEM-768 KEM. */ +#define OQS_KEM_alg_ml_kem_768 "ML-KEM-768" +/** Algorithm identifier for ML-KEM-1024-ipd KEM. */ +#define OQS_KEM_alg_ml_kem_1024_ipd "ML-KEM-1024-ipd" +/** Algorithm identifier for ML-KEM-1024 KEM. */ +#define OQS_KEM_alg_ml_kem_1024 "ML-KEM-1024" ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END /** Algorithm identifier for sntrup761 KEM. */ #define OQS_KEM_alg_ntruprime_sntrup761 "sntrup761" @@ -87,8 +99,9 @@ extern "C" { #define OQS_KEM_alg_frodokem_1344_shake "FrodoKEM-1344-SHAKE" // EDIT-WHEN-ADDING-KEM ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_START + /** Number of algorithm identifiers above. */ -#define OQS_KEM_algs_length 26 +#define OQS_KEM_algs_length 32 ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_END /** @@ -269,6 +282,9 @@ OQS_API void OQS_KEM_free(OQS_KEM *kem); #ifdef OQS_ENABLE_KEM_KYBER #include #endif /* OQS_ENABLE_KEM_KYBER */ +#ifdef OQS_ENABLE_KEM_ML_KEM +#include +#endif /* OQS_ENABLE_KEM_ML_KEM */ ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_INCLUDE_END #ifdef OQS_ENABLE_KEM_NTRUPRIME #include diff --git a/src/kem/kyber/kem_kyber.h b/src/kem/kyber/kem_kyber.h index 5d24808488..cb475aff27 100644 --- a/src/kem/kyber/kem_kyber.h +++ b/src/kem/kyber/kem_kyber.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_KEM_kyber_512 +#if defined(OQS_ENABLE_KEM_kyber_512) #define OQS_KEM_kyber_512_length_public_key 800 #define OQS_KEM_kyber_512_length_secret_key 1632 #define OQS_KEM_kyber_512_length_ciphertext 768 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_kyber_512_encaps(uint8_t *ciphertext, uint8_t *shared OQS_API OQS_STATUS OQS_KEM_kyber_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_kyber_768 +#if defined(OQS_ENABLE_KEM_kyber_768) #define OQS_KEM_kyber_768_length_public_key 1184 #define OQS_KEM_kyber_768_length_secret_key 2400 #define OQS_KEM_kyber_768_length_ciphertext 1088 @@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_kyber_768_encaps(uint8_t *ciphertext, uint8_t *shared OQS_API OQS_STATUS OQS_KEM_kyber_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); #endif -#ifdef OQS_ENABLE_KEM_kyber_1024 +#if defined(OQS_ENABLE_KEM_kyber_1024) #define OQS_KEM_kyber_1024_length_public_key 1568 #define OQS_KEM_kyber_1024_length_secret_key 3168 #define OQS_KEM_kyber_1024_length_ciphertext 1568 diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt new file mode 100644 index 0000000000..a5890ab9ce --- /dev/null +++ b/src/kem/ml_kem/CMakeLists.txt @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: MIT + +# This file was generated by +# scripts/copy_from_upstream/copy_from_upstream.py + +set(_ML_KEM_OBJS "") + +if(OQS_ENABLE_KEM_ml_kem_512_ipd) + add_library(ml_kem_512_ipd_ref OBJECT kem_ml_kem_512_ipd.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/verify.c) + target_compile_options(ml_kem_512_ipd_ref PUBLIC -DKYBER_K=2) + target_include_directories(ml_kem_512_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512-ipd_ref) + target_include_directories(ml_kem_512_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_512_ipd_ref PUBLIC -DKYBER_K=2) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +if(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2) + add_library(ml_kem_512_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c) + target_include_directories(ml_kem_512_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2) + target_include_directories(ml_kem_512_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_512_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) + target_compile_options(ml_kem_512_ipd_avx2 PUBLIC -DKYBER_K=2) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +if(OQS_ENABLE_KEM_ml_kem_768_ipd) + add_library(ml_kem_768_ipd_ref OBJECT kem_ml_kem_768_ipd.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/verify.c) + target_compile_options(ml_kem_768_ipd_ref PUBLIC -DKYBER_K=3) + target_include_directories(ml_kem_768_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768-ipd_ref) + target_include_directories(ml_kem_768_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_768_ipd_ref PUBLIC -DKYBER_K=3) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +if(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2) + add_library(ml_kem_768_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/verify.c) + target_include_directories(ml_kem_768_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2) + target_include_directories(ml_kem_768_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_768_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) + target_compile_options(ml_kem_768_ipd_avx2 PUBLIC -DKYBER_K=3) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +if(OQS_ENABLE_KEM_ml_kem_1024_ipd) + add_library(ml_kem_1024_ipd_ref OBJECT kem_ml_kem_1024_ipd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c) + target_compile_options(ml_kem_1024_ipd_ref PUBLIC -DKYBER_K=4) + target_include_directories(ml_kem_1024_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref) + target_include_directories(ml_kem_1024_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_1024_ipd_ref PUBLIC -DKYBER_K=4) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +if(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2) + add_library(ml_kem_1024_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c) + target_include_directories(ml_kem_1024_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2) + target_include_directories(ml_kem_1024_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_kem_1024_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) + target_compile_options(ml_kem_1024_ipd_avx2 PUBLIC -DKYBER_K=4) + set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) +endif() + +set(ML_KEM_OBJS ${_ML_KEM_OBJS} PARENT_SCOPE) diff --git a/src/kem/ml_kem/kem_ml_kem.h b/src/kem/ml_kem/kem_ml_kem.h new file mode 100644 index 0000000000..b3e3d99cfb --- /dev/null +++ b/src/kem/ml_kem/kem_ml_kem.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: MIT + +#ifndef OQS_KEM_ML_KEM_H +#define OQS_KEM_ML_KEM_H + +#include + +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd) || defined(OQS_ENABLE_KEM_ml_kem_512) +#define OQS_KEM_ml_kem_512_ipd_length_public_key 800 +#define OQS_KEM_ml_kem_512_ipd_length_secret_key 1632 +#define OQS_KEM_ml_kem_512_ipd_length_ciphertext 768 +#define OQS_KEM_ml_kem_512_ipd_length_shared_secret 32 +OQS_KEM *OQS_KEM_ml_kem_512_ipd_new(void); +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); + +#define OQS_KEM_ml_kem_512_length_public_key OQS_KEM_ml_kem_512_ipd_length_public_key +#define OQS_KEM_ml_kem_512_length_secret_key OQS_KEM_ml_kem_512_ipd_length_secret_key +#define OQS_KEM_ml_kem_512_length_ciphertext OQS_KEM_ml_kem_512_ipd_length_ciphertext +#define OQS_KEM_ml_kem_512_length_shared_secret OQS_KEM_ml_kem_512_ipd_length_shared_secret +OQS_KEM *OQS_KEM_ml_kem_512_new(void); +#define OQS_KEM_ml_kem_512_keypair OQS_KEM_ml_kem_512_ipd_keypair +#define OQS_KEM_ml_kem_512_encaps OQS_KEM_ml_kem_512_ipd_encaps +#define OQS_KEM_ml_kem_512_decaps OQS_KEM_ml_kem_512_ipd_decaps +#endif + +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd) || defined(OQS_ENABLE_KEM_ml_kem_768) +#define OQS_KEM_ml_kem_768_ipd_length_public_key 1184 +#define OQS_KEM_ml_kem_768_ipd_length_secret_key 2400 +#define OQS_KEM_ml_kem_768_ipd_length_ciphertext 1088 +#define OQS_KEM_ml_kem_768_ipd_length_shared_secret 32 +OQS_KEM *OQS_KEM_ml_kem_768_ipd_new(void); +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); + +#define OQS_KEM_ml_kem_768_length_public_key OQS_KEM_ml_kem_768_ipd_length_public_key +#define OQS_KEM_ml_kem_768_length_secret_key OQS_KEM_ml_kem_768_ipd_length_secret_key +#define OQS_KEM_ml_kem_768_length_ciphertext OQS_KEM_ml_kem_768_ipd_length_ciphertext +#define OQS_KEM_ml_kem_768_length_shared_secret OQS_KEM_ml_kem_768_ipd_length_shared_secret +OQS_KEM *OQS_KEM_ml_kem_768_new(void); +#define OQS_KEM_ml_kem_768_keypair OQS_KEM_ml_kem_768_ipd_keypair +#define OQS_KEM_ml_kem_768_encaps OQS_KEM_ml_kem_768_ipd_encaps +#define OQS_KEM_ml_kem_768_decaps OQS_KEM_ml_kem_768_ipd_decaps +#endif + +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd) || defined(OQS_ENABLE_KEM_ml_kem_1024) +#define OQS_KEM_ml_kem_1024_ipd_length_public_key 1568 +#define OQS_KEM_ml_kem_1024_ipd_length_secret_key 3168 +#define OQS_KEM_ml_kem_1024_ipd_length_ciphertext 1568 +#define OQS_KEM_ml_kem_1024_ipd_length_shared_secret 32 +OQS_KEM *OQS_KEM_ml_kem_1024_ipd_new(void); +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key); +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key); + +#define OQS_KEM_ml_kem_1024_length_public_key OQS_KEM_ml_kem_1024_ipd_length_public_key +#define OQS_KEM_ml_kem_1024_length_secret_key OQS_KEM_ml_kem_1024_ipd_length_secret_key +#define OQS_KEM_ml_kem_1024_length_ciphertext OQS_KEM_ml_kem_1024_ipd_length_ciphertext +#define OQS_KEM_ml_kem_1024_length_shared_secret OQS_KEM_ml_kem_1024_ipd_length_shared_secret +OQS_KEM *OQS_KEM_ml_kem_1024_new(void); +#define OQS_KEM_ml_kem_1024_keypair OQS_KEM_ml_kem_1024_ipd_keypair +#define OQS_KEM_ml_kem_1024_encaps OQS_KEM_ml_kem_1024_ipd_encaps +#define OQS_KEM_ml_kem_1024_decaps OQS_KEM_ml_kem_1024_ipd_decaps +#endif + +#endif + diff --git a/src/kem/ml_kem/kem_ml_kem_1024_ipd.c b/src/kem/ml_kem/kem_ml_kem_1024_ipd.c new file mode 100644 index 0000000000..182b3b32e9 --- /dev/null +++ b/src/kem/ml_kem/kem_ml_kem_1024_ipd.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd) + +OQS_KEM *OQS_KEM_ml_kem_1024_ipd_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_1024_ipd; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 5; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_1024_ipd_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_1024_ipd_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_1024_ipd_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_1024_ipd_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_1024_ipd_keypair; + kem->encaps = OQS_KEM_ml_kem_1024_ipd_encaps; + kem->decaps = OQS_KEM_ml_kem_1024_ipd_decaps; + + return kem; +} + +/** Alias */ +OQS_KEM *OQS_KEM_ml_kem_1024_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_1024; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 5; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_1024_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_1024_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_1024_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_1024_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_1024_keypair; + kem->encaps = OQS_KEM_ml_kem_1024_encaps; + kem->decaps = OQS_KEM_ml_kem_1024_decaps; + + return kem; +} + +extern int pqcrystals_ml_kem_1024_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_1024_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_1024_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2) +extern int pqcrystals_ml_kem_1024_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_1024_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_1024_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); +#endif + +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_enc(ciphertext, shared_secret, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_enc(ciphertext, shared_secret, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_enc(ciphertext, shared_secret, public_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_dec(shared_secret, ciphertext, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_dec(shared_secret, ciphertext, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_dec(shared_secret, ciphertext, secret_key); +#endif +} + +#endif diff --git a/src/kem/ml_kem/kem_ml_kem_512_ipd.c b/src/kem/ml_kem/kem_ml_kem_512_ipd.c new file mode 100644 index 0000000000..ea228dd869 --- /dev/null +++ b/src/kem/ml_kem/kem_ml_kem_512_ipd.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd) + +OQS_KEM *OQS_KEM_ml_kem_512_ipd_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_512_ipd; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 1; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_512_ipd_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_512_ipd_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_512_ipd_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_512_ipd_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_512_ipd_keypair; + kem->encaps = OQS_KEM_ml_kem_512_ipd_encaps; + kem->decaps = OQS_KEM_ml_kem_512_ipd_decaps; + + return kem; +} + +/** Alias */ +OQS_KEM *OQS_KEM_ml_kem_512_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_512; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 1; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_512_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_512_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_512_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_512_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_512_keypair; + kem->encaps = OQS_KEM_ml_kem_512_encaps; + kem->decaps = OQS_KEM_ml_kem_512_decaps; + + return kem; +} + +extern int pqcrystals_ml_kem_512_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_512_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_512_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2) +extern int pqcrystals_ml_kem_512_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_512_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_512_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); +#endif + +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_enc(ciphertext, shared_secret, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_enc(ciphertext, shared_secret, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_enc(ciphertext, shared_secret, public_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_dec(shared_secret, ciphertext, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_dec(shared_secret, ciphertext, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_dec(shared_secret, ciphertext, secret_key); +#endif +} + +#endif diff --git a/src/kem/ml_kem/kem_ml_kem_768_ipd.c b/src/kem/ml_kem/kem_ml_kem_768_ipd.c new file mode 100644 index 0000000000..281f505fa7 --- /dev/null +++ b/src/kem/ml_kem/kem_ml_kem_768_ipd.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd) + +OQS_KEM *OQS_KEM_ml_kem_768_ipd_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_768_ipd; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 3; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_768_ipd_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_768_ipd_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_768_ipd_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_768_ipd_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_768_ipd_keypair; + kem->encaps = OQS_KEM_ml_kem_768_ipd_encaps; + kem->decaps = OQS_KEM_ml_kem_768_ipd_decaps; + + return kem; +} + +/** Alias */ +OQS_KEM *OQS_KEM_ml_kem_768_new(void) { + + OQS_KEM *kem = malloc(sizeof(OQS_KEM)); + if (kem == NULL) { + return NULL; + } + kem->method_name = OQS_KEM_alg_ml_kem_768; + kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard"; + + kem->claimed_nist_level = 3; + kem->ind_cca = true; + + kem->length_public_key = OQS_KEM_ml_kem_768_length_public_key; + kem->length_secret_key = OQS_KEM_ml_kem_768_length_secret_key; + kem->length_ciphertext = OQS_KEM_ml_kem_768_length_ciphertext; + kem->length_shared_secret = OQS_KEM_ml_kem_768_length_shared_secret; + + kem->keypair = OQS_KEM_ml_kem_768_keypair; + kem->encaps = OQS_KEM_ml_kem_768_encaps; + kem->decaps = OQS_KEM_ml_kem_768_decaps; + + return kem; +} + +extern int pqcrystals_ml_kem_768_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_768_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_768_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2) +extern int pqcrystals_ml_kem_768_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_kem_768_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int pqcrystals_ml_kem_768_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); +#endif + +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_enc(ciphertext, shared_secret, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_enc(ciphertext, shared_secret, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_enc(ciphertext, shared_secret, public_key); +#endif +} + +OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_dec(shared_secret, ciphertext, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_dec(shared_secret, ciphertext, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_dec(shared_secret, ciphertext, secret_key); +#endif +} + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h new file mode 100644 index 0000000000..3463866f37 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT16(N) \ + union { \ + int16_t coeffs[N]; \ + __m256i vec[(N+15)/16]; \ + } + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h new file mode 100644 index 0000000000..a154e80f1d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S new file mode 100644 index 0000000000..36990639b2 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S @@ -0,0 +1,105 @@ +#include "consts.h" + +.macro schoolbook off +vmovdqa _16XQINV*2(%rcx),%ymm0 +vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 +vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 +vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 +vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 + +vpmullw %ymm0,%ymm1,%ymm9 # a0.lo +vpmullw %ymm0,%ymm2,%ymm10 # b0.lo +vpmullw %ymm0,%ymm3,%ymm11 # a1.lo +vpmullw %ymm0,%ymm4,%ymm12 # b1.lo + +vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 +vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 + +vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi +vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi +vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi +vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi + +vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 +vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 + +vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi +vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi +vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi +vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi + +vmovdqa %ymm13,(%rsp) + +vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo +vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo +vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo +vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo + +vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo +vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo +vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo +vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo + +vmovdqa _16XQ*2(%rcx),%ymm8 +vpmulhw %ymm8,%ymm13,%ymm13 +vpmulhw %ymm8,%ymm9,%ymm9 +vpmulhw %ymm8,%ymm5,%ymm5 +vpmulhw %ymm8,%ymm10,%ymm10 +vpmulhw %ymm8,%ymm6,%ymm6 +vpmulhw %ymm8,%ymm11,%ymm11 +vpmulhw %ymm8,%ymm7,%ymm7 +vpmulhw %ymm8,%ymm12,%ymm12 + +vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 +vpsubw %ymm9,%ymm1,%ymm9 # a0d0 +vpsubw %ymm5,%ymm14,%ymm5 # b0c0 +vpsubw %ymm10,%ymm2,%ymm10 # b0d0 + +vpsubw %ymm6,%ymm15,%ymm6 # a1c1 +vpsubw %ymm11,%ymm3,%ymm11 # a1d1 +vpsubw %ymm7,%ymm0,%ymm7 # b1c1 +vpsubw %ymm12,%ymm4,%ymm12 # b1d1 + +vmovdqa (%r9),%ymm0 +vmovdqa 32(%r9),%ymm1 +vpmullw %ymm0,%ymm10,%ymm2 +vpmullw %ymm0,%ymm12,%ymm3 +vpmulhw %ymm1,%ymm10,%ymm10 +vpmulhw %ymm1,%ymm12,%ymm12 +vpmulhw %ymm8,%ymm2,%ymm2 +vpmulhw %ymm8,%ymm3,%ymm3 +vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 +vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 + +vpaddw %ymm5,%ymm9,%ymm9 +vpaddw %ymm7,%ymm11,%ymm11 +vpsubw %ymm13,%ymm10,%ymm13 +vpsubw %ymm12,%ymm6,%ymm6 + +vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(64*\off+16)*2(%rdi) +vmovdqa %ymm6,(64*\off+32)*2(%rdi) +vmovdqa %ymm11,(64*\off+48)*2(%rdi) +.endm + +.text +.global cdecl(basemul_avx) +cdecl(basemul_avx): +mov %rsp,%r8 +and $-32,%rsp +sub $32,%rsp + +lea (_ZETAS_EXP+176)*2(%rcx),%r9 +schoolbook 0 + +add $32*2,%r9 +schoolbook 1 + +add $192*2,%r9 +schoolbook 2 + +add $32*2,%r9 +schoolbook 3 + +mov %r8,%rsp +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c new file mode 100644 index 0000000000..dad473c79e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c @@ -0,0 +1,144 @@ +#include +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask55 = _mm256_set1_epi32(0x55555555); + const __m256i mask33 = _mm256_set1_epi32(0x33333333); + const __m256i mask03 = _mm256_set1_epi32(0x03030303); + const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); + + for(i = 0; i < KYBER_N/64; i++) { + f0 = _mm256_load_si256(&buf[i]); + + f1 = _mm256_srli_epi16(f0, 1); + f0 = _mm256_and_si256(mask55, f0); + f1 = _mm256_and_si256(mask55, f1); + f0 = _mm256_add_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 2); + f0 = _mm256_and_si256(mask33, f0); + f1 = _mm256_and_si256(mask33, f1); + f0 = _mm256_add_epi8(f0, mask33); + f0 = _mm256_sub_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 4); + f0 = _mm256_and_si256(mask0F, f0); + f1 = _mm256_and_si256(mask0F, f1); + f0 = _mm256_sub_epi8(f0, mask03); + f1 = _mm256_sub_epi8(f1, mask03); + + f2 = _mm256_unpacklo_epi8(f0, f1); + f3 = _mm256_unpackhi_epi8(f0, f1); + + f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); + f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); + f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); + f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); + + _mm256_store_si256(&r->vec[4*i+0], f0); + _mm256_store_si256(&r->vec[4*i+1], f2); + _mm256_store_si256(&r->vec[4*i+2], f1); + _mm256_store_si256(&r->vec[4*i+3], f3); + } +} + +#if KYBER_ETA1 == 3 +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3 +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask249 = _mm256_set1_epi32(0x249249); + const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); + const __m256i mask07 = _mm256_set1_epi32(7); + const __m256i mask70 = _mm256_set1_epi32(7 << 16); + const __m256i mask3 = _mm256_set1_epi16(3); + const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); + + for(i = 0; i < KYBER_N/32; i++) { + f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); + f0 = _mm256_permute4x64_epi64(f0,0x94); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + + f1 = _mm256_srli_epi32(f0,1); + f2 = _mm256_srli_epi32(f0,2); + f0 = _mm256_and_si256(mask249,f0); + f1 = _mm256_and_si256(mask249,f1); + f2 = _mm256_and_si256(mask249,f2); + f0 = _mm256_add_epi32(f0,f1); + f0 = _mm256_add_epi32(f0,f2); + + f1 = _mm256_srli_epi32(f0,3); + f0 = _mm256_add_epi32(f0,mask6DB); + f0 = _mm256_sub_epi32(f0,f1); + + f1 = _mm256_slli_epi32(f0,10); + f2 = _mm256_srli_epi32(f0,12); + f3 = _mm256_srli_epi32(f0, 2); + f0 = _mm256_and_si256(f0,mask07); + f1 = _mm256_and_si256(f1,mask70); + f2 = _mm256_and_si256(f2,mask07); + f3 = _mm256_and_si256(f3,mask70); + f0 = _mm256_add_epi16(f0,f1); + f1 = _mm256_add_epi16(f2,f3); + f0 = _mm256_sub_epi16(f0,mask3); + f1 = _mm256_sub_epi16(f1,mask3); + + f2 = _mm256_unpacklo_epi32(f0,f1); + f3 = _mm256_unpackhi_epi32(f0,f1); + + f0 = _mm256_permute2x128_si256(f2,f3,0x20); + f1 = _mm256_permute2x128_si256(f2,f3,0x31); + + _mm256_store_si256(&r->vec[2*i+0], f0); + _mm256_store_si256(&r->vec[2*i+1], f1); + } +} +#endif + +/* buf 32 bytes longer for cbd3 */ +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, (uint8_t *)buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h new file mode 100644 index 0000000000..05788e06b4 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h @@ -0,0 +1,15 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c new file mode 100644 index 0000000000..84e596893d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c @@ -0,0 +1,121 @@ +#include "align.h" +#include "params.h" +#include "consts.h" + +#define Q KYBER_Q +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 +#define V 20159 // floor(2^26/q + 0.5) +#define FHI 1441 // mont^2/128 +#define FLO -10079 // qinv*FHI +#define MONTSQHI 1353 // mont^2 +#define MONTSQLO 20553 // qinv*MONTSQHI +#define MASK 4095 +#define SHIFT 32 + +const qdata_t qdata = {{ +#define _16XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, + +#define _16XQINV 16 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _16XV 32 + V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, + +#define _16XFLO 48 + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + +#define _16XFHI 64 + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + +#define _16XMONTSQLO 80 + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + +#define _16XMONTSQHI 96 + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + +#define _16XMASK 112 + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + +#define _REVIDXB 128 + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + +#define _REVIDXD 144 + 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, + +#define _ZETAS_EXP 160 + 31498, 31498, 31498, 31498, -758, -758, -758, -758, + 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + -359, -359, -359, -359, -359, -359, -359, -359, + -359, -359, -359, -359, -359, -359, -359, -359, + 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, + -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, + 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, + 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, + -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, + -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, + -171, -171, -171, -171, 622, 622, 622, 622, + 1577, 1577, 1577, 1577, 182, 182, 182, 182, + -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, + 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, + 573, 573, -1325, -1325, 264, 264, 383, 383, + -829, -829, 1458, 1458, -1602, -1602, -130, -130, + -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, + -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, + 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, + -1103, 555, -1251, 1550, 422, 177, -291, 1574, + -246, 1159, -777, -602, -1590, -872, 418, -156, + 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, + -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, + 430, 843, 871, 105, 587, -235, -460, 1653, + 778, -147, 1483, 1119, 644, 349, 329, -75, + 787, 787, 787, 787, 787, 787, 787, 787, + 787, 787, 787, 787, 787, 787, 787, 787, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, + -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, + 287, 287, 287, 287, 287, 287, 287, 287, + 202, 202, 202, 202, 202, 202, 202, 202, + 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, + -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, + 962, 962, 962, 962, -1202, -1202, -1202, -1202, + -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, + -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, + 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, + -681, -681, 1017, 1017, 732, 732, 608, 608, + -1542, -1542, 411, 411, -205, -205, -1571, -1571, + 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, + 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, + 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, + 817, 603, 1322, -1465, -1215, 1218, -874, -1187, + -1185, -1278, -1510, -870, -108, 996, 958, 1522, + 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, + -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, + 1097, 610, -1285, 384, -136, -1335, 220, -1659, + -1530, 794, -854, 478, -308, 991, -1460, 1628, + +#define _16XSHIFT 624 + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT +}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h new file mode 100644 index 0000000000..f95899cd8e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h @@ -0,0 +1,43 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _16XQ 0 +#define _16XQINV 16 +#define _16XV 32 +#define _16XFLO 48 +#define _16XFHI 64 +#define _16XMONTSQLO 80 +#define _16XMONTSQHI 96 +#define _16XMASK 112 +#define _REVIDXB 128 +#define _REVIDXD 144 +#define _ZETAS_EXP 160 +#define _16XSHIFT 624 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#ifdef __ASSEMBLER__ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define cdecl2(s) decorate(s) +#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) +#else +#define cdecl(s) KYBER_NAMESPACE(##s) +#endif +#endif + +#ifndef __ASSEMBLER__ +#include "align.h" +typedef ALIGNED_INT16(640) qdata_t; +#define qdata KYBER_NAMESPACE(qdata) +extern const qdata_t qdata; +#endif + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S new file mode 100644 index 0000000000..3bb1ebd3d8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S @@ -0,0 +1,88 @@ +#include "consts.h" +.include "fq.inc" + +.text +reduce128_avx: +#load +vmovdqa (%rdi),%ymm2 +vmovdqa 32(%rdi),%ymm3 +vmovdqa 64(%rdi),%ymm4 +vmovdqa 96(%rdi),%ymm5 +vmovdqa 128(%rdi),%ymm6 +vmovdqa 160(%rdi),%ymm7 +vmovdqa 192(%rdi),%ymm8 +vmovdqa 224(%rdi),%ymm9 + +red16 2 +red16 3 +red16 4 +red16 5 +red16 6 +red16 7 +red16 8 +red16 9 + +#store +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm3,32(%rdi) +vmovdqa %ymm4,64(%rdi) +vmovdqa %ymm5,96(%rdi) +vmovdqa %ymm6,128(%rdi) +vmovdqa %ymm7,160(%rdi) +vmovdqa %ymm8,192(%rdi) +vmovdqa %ymm9,224(%rdi) + +ret + +.global cdecl(reduce_avx) +cdecl(reduce_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XV*2(%rsi),%ymm1 +call reduce128_avx +add $256,%rdi +call reduce128_avx +ret + +tomont128_avx: +#load +vmovdqa (%rdi),%ymm3 +vmovdqa 32(%rdi),%ymm4 +vmovdqa 64(%rdi),%ymm5 +vmovdqa 96(%rdi),%ymm6 +vmovdqa 128(%rdi),%ymm7 +vmovdqa 160(%rdi),%ymm8 +vmovdqa 192(%rdi),%ymm9 +vmovdqa 224(%rdi),%ymm10 + +fqmulprecomp 1,2,3,11 +fqmulprecomp 1,2,4,12 +fqmulprecomp 1,2,5,13 +fqmulprecomp 1,2,6,14 +fqmulprecomp 1,2,7,15 +fqmulprecomp 1,2,8,11 +fqmulprecomp 1,2,9,12 +fqmulprecomp 1,2,10,13 + +#store +vmovdqa %ymm3,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm5,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm7,128(%rdi) +vmovdqa %ymm8,160(%rdi) +vmovdqa %ymm9,192(%rdi) +vmovdqa %ymm10,224(%rdi) + +ret + +.global cdecl(tomont_avx) +cdecl(tomont_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 +vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 +call tomont128_avx +add $256,%rdi +call tomont128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc new file mode 100644 index 0000000000..4b7afc3118 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc @@ -0,0 +1,30 @@ +.macro red16 r,rs=0,x=12 +vpmulhw %ymm1,%ymm\r,%ymm\x +.if \rs +vpmulhrsw %ymm\rs,%ymm\x,%ymm\x +.else +vpsraw $10,%ymm\x,%ymm\x +.endif +vpmullw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro csubq r,x=12 +vpsubw %ymm0,%ymm\r,%ymm\r +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro caddq r,x=12 +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro fqmulprecomp al,ah,b,x=12 +vpmullw %ymm\al,%ymm\b,%ymm\x +vpmulhw %ymm\ah,%ymm\b,%ymm\b +vpmulhw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\b,%ymm\b +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c new file mode 100644 index 0000000000..572ce49007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c @@ -0,0 +1,566 @@ +#include +#include +#include +#include +#include "align.h" +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "cbd.h" +#include "rejsample.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk and the +* public seed used to generate the matrix A. +* The polynomial coefficients in pk are assumed to +* lie in the invertal [0,q], i.e. pk must be reduced +* by polyvec_reduce(). +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key. +* The polynomial coefficients in sk are assumed to +* lie in the invertal [0,q], i.e. sk must be reduced +* by polyvec_reduce(). +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v. +* The polynomial coefficients in b and v are assumed to +* lie in the invertal [0,q], i.e. b and v must be reduced +* by polyvec_reduce() and poly_reduce(), respectively. +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output array +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#if KYBER_K == 2 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 1; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 1; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[1].vec[0]); + poly_nttunpack(&a[1].vec[1]); + shake128x4_inc_ctx_release(&state); +} +#elif KYBER_K == 3 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128incctx state1x; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 0; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 0; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[0].vec[2]); + poly_nttunpack(&a[1].vec[0]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 2; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 2; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 2; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 2; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); + + poly_nttunpack(&a[1].vec[1]); + poly_nttunpack(&a[1].vec[2]); + poly_nttunpack(&a[2].vec[0]); + poly_nttunpack(&a[2].vec[1]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + buf[0].coeffs[32] = 2; + buf[0].coeffs[33] = 2; + + shake128_inc_init(&state1x); + shake128_absorb_once(&state1x, buf[0].coeffs, 34); + shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); + ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); + while(ctr0 < KYBER_N) { + shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); + ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + } + shake128_inc_ctx_release(&state1x); + + poly_nttunpack(&a[2].vec[2]); +} +#elif KYBER_K == 4 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int i, ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128x4_inc_init(&state); + + for(i=0;i<4;i++) { + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = i; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = i; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = i; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = i; + buf[3].coeffs[33] = 3; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = i; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = i; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = i; + buf[3].coeffs[32] = 3; + buf[3].coeffs[33] = i; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[i].vec[0]); + poly_nttunpack(&a[i].vec[1]); + poly_nttunpack(&a[i].vec[2]); + poly_nttunpack(&a[i].vec[3]); + } + shake128x4_inc_ctx_release(&state); +} +#endif + +/************************************************* +* Name: indcpa_keypair_derand +* +* Description: Generates public and private key for the CPA-secure +* public-key encryption scheme underlying Kyber +* +* Arguments: - uint8_t *pk: pointer to output public key +* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (of length KYBER_SYMBYTES bytes) +**************************************************/ +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]) +{ + unsigned int i; + uint8_t buf[2*KYBER_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + KYBER_SYMBYTES; + polyvec a[KYBER_K], e, pkpv, skpv; + + hash_g(buf, coins, KYBER_SYMBYTES); + + gen_a(a, publicseed); + +#if KYBER_K == 2 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); +#elif KYBER_K == 3 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); +#elif KYBER_K == 4 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_reduce(&skpv); + polyvec_ntt(&e); + + // matrix-vector multiplication + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S new file mode 100644 index 0000000000..76d4189996 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S @@ -0,0 +1,193 @@ +#include "consts.h" +.include "shuffle.inc" +.include "fq.inc" + +.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 +vpsubw %ymm\rl0,%ymm\rh0,%ymm12 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 +vpsubw %ymm\rl1,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl0,%ymm12,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 +vpsubw %ymm\rl2,%ymm\rh2,%ymm14 + +vpmullw %ymm\zl0,%ymm13,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 +vpsubw %ymm\rl3,%ymm\rh3,%ymm15 + +vpmullw %ymm\zl1,%ymm14,%ymm\rh2 +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 +vpmullw %ymm\zl1,%ymm15,%ymm\rh3 + +vpmulhw %ymm\zh0,%ymm12,%ymm12 +vpmulhw %ymm\zh0,%ymm13,%ymm13 + +vpmulhw %ymm\zh1,%ymm14,%ymm14 +vpmulhw %ymm\zh1,%ymm15,%ymm15 + +vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 + +vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 + +# + +# + +vpsubw %ymm\rh0,%ymm12,%ymm\rh0 + +vpsubw %ymm\rh1,%ymm13,%ymm\rh1 + +vpsubw %ymm\rh2,%ymm14,%ymm\rh2 +vpsubw %ymm\rh3,%ymm15,%ymm\rh3 +.endm + +.macro intt_levels0t5 off +/* level 0 */ +vmovdqa _16XFLO*2(%rsi),%ymm2 +vmovdqa _16XFHI*2(%rsi),%ymm3 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +fqmulprecomp 2,3,4 +fqmulprecomp 2,3,6 +fqmulprecomp 2,3,5 +fqmulprecomp 2,3,7 + +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 + +fqmulprecomp 2,3,8 +fqmulprecomp 2,3,10 +fqmulprecomp 2,3,9 +fqmulprecomp 2,3,11 + +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm12 +vpshufb %ymm12,%ymm15,%ymm15 +vpshufb %ymm12,%ymm1,%ymm1 +vpshufb %ymm12,%ymm2,%ymm2 +vpshufb %ymm12,%ymm3,%ymm3 + +butterfly 4,5,8,9,6,7,10,11,15,1,2,3 + +/* level 1 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm1 +vpshufb %ymm1,%ymm2,%ymm2 +vpshufb %ymm1,%ymm3,%ymm3 + +butterfly 4,5,6,7,8,9,10,11,2,2,3,3 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +/* level 2 */ +vmovdqa _REVIDXD*2(%rsi),%ymm12 +vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 +vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 + +butterfly 3,4,6,8,5,7,9,11,2,2,10,10 + +vmovdqa _16XV*2(%rsi),%ymm1 +red16 3 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +/* level 3 */ +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 + +butterfly 10,3,6,5,4,8,7,11,2,2,9,9 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +/* level 4 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 + +butterfly 9,10,6,4,3,5,8,11,2,2,7,7 + +red16 9 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 +vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 + +butterfly 7,9,6,3,10,4,5,11,2,2,8,8 + +vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.macro intt_level6 off +/* level 6 */ +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 + +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 + +butterfly 4,5,6,7,8,9,10,11 + +.if \off == 0 +red16 4 +.endif + +vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +intt_levels0t5 0 +intt_levels0t5 1 + +intt_level6 0 +intt_level6 1 +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S new file mode 100644 index 0000000000..0ce7b41297 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S @@ -0,0 +1,189 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 +vpmullw %ymm\zl0,%ymm\rh0,%ymm12 +vpmullw %ymm\zl0,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl1,%ymm\rh2,%ymm14 +vpmullw %ymm\zl1,%ymm\rh3,%ymm15 + +vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 +vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 +.endm + +.macro reduce +vpmulhw %ymm0,%ymm12,%ymm12 +vpmulhw %ymm0,%ymm13,%ymm13 + +vpmulhw %ymm0,%ymm14,%ymm14 +vpmulhw %ymm0,%ymm15,%ymm15 +.endm + +.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln +vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 + +vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 +vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 + +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 +vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 + +vpsubw %ymm12,%ymm\rln,%ymm\rln +vpaddw %ymm12,%ymm\rh0,%ymm\rh0 +vpsubw %ymm13,%ymm\rl0,%ymm\rl0 + +vpaddw %ymm13,%ymm\rh1,%ymm\rh1 +vpsubw %ymm14,%ymm\rl1,%ymm\rl1 +vpaddw %ymm14,%ymm\rh2,%ymm\rh2 + +vpsubw %ymm15,%ymm\rl2,%ymm\rl2 +vpaddw %ymm15,%ymm\rh3,%ymm\rh3 +.endm + +.macro level0 off +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.macro levels1t6 off +/* level 1 */ +vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 +vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +/* level 2 */ +shuffle8 5,10,7,10 +shuffle8 6,11,5,11 + +vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 + +mul 7,10,5,11 + +shuffle8 3,8,6,8 +shuffle8 4,9,3,9 + +reduce +update 4,6,8,3,9,7,10,5,11 + +/* level 3 */ +shuffle4 8,5,9,5 +shuffle4 3,11,8,11 + +vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 + +mul 9,5,8,11 + +shuffle4 4,7,3,7 +shuffle4 6,10,4,10 + +reduce +update 6,3,7,4,10,9,5,8,11 + +/* level 4 */ +shuffle2 7,8,10,8 +shuffle2 4,11,7,11 + +vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 + +mul 10,8,7,11 + +shuffle2 6,9,4,9 +shuffle2 3,5,6,5 + +reduce +update 3,4,9,6,5,10,8,7,11 + +/* level 5 */ +shuffle1 9,7,5,7 +shuffle1 6,11,9,11 + +vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 + +mul 5,7,9,11 + +shuffle1 3,10,6,10 +shuffle1 4,8,3,8 + +reduce +update 4,6,10,3,8,5,7,9,11 + +/* level 6 */ +vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 +vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 +vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 + +mul 10,3,9,11,14,15,8,2 + +reduce +update 8,4,6,5,7,10,3,9,11 + +vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +level0 0 +level0 1 + +levels1t6 0 +levels1t6 1 + +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h new file mode 100644 index 0000000000..a4f48e343b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h @@ -0,0 +1,28 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include + +#define ntt_avx KYBER_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *r, const __m256i *qdata); +#define invntt_avx KYBER_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *r, const __m256i *qdata); + +#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) +void nttpack_avx(__m256i *r, const __m256i *qdata); +#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *r, const __m256i *qdata); + +#define basemul_avx KYBER_NAMESPACE(basemul_avx) +void basemul_avx(__m256i *r, + const __m256i *a, + const __m256i *b, + const __m256i *qdata); + +#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) +void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); +#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) +void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h new file mode 100644 index 0000000000..fdc688ea2b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h @@ -0,0 +1,68 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + +//#define KYBER_90S /* Uncomment this if you want the 90S variant */ + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s +#endif +#elif (KYBER_K == 3) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s +#endif +#elif (KYBER_K == 4) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s +#endif +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c new file mode 100644 index 0000000000..681fd6d23e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c @@ -0,0 +1,519 @@ +#include +#include +#include +#include "align.h" +#include "fips202x4.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +#if (KYBER_POLYCOMPRESSEDBYTES == 128) +void poly_compress(uint8_t r[128], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 9); + const __m256i mask = _mm256_set1_epi16(15); + const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); + const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); + + for(i=0;ivec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f2 = _mm256_mulhi_epi16(f2,v); + f3 = _mm256_mulhi_epi16(f3,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f2 = _mm256_mulhrs_epi16(f2,shift1); + f3 = _mm256_mulhrs_epi16(f3,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f2 = _mm256_and_si256(f2,mask); + f3 = _mm256_and_si256(f3,mask); + f0 = _mm256_packus_epi16(f0,f1); + f2 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift2); + f2 = _mm256_maddubs_epi16(f2,shift2); + f0 = _mm256_packus_epi16(f0,f2); + f0 = _mm256_permutevar8x32_epi32(f0,permdidx); + _mm256_storeu_si256((__m256i *)&r[32*i],f0); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[128]) +{ + unsigned int i; + __m128i t; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, + 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); + const __m256i mask = _mm256_set1_epi32(0x00F0000F); + const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) +void poly_compress(uint8_t r[160], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 10); + const __m256i mask = _mm256_set1_epi16(31); + const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); + const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, + -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 + f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srlv_epi64(f0,sllvdidx); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[160]) +{ + unsigned int i; + __m128i t; + __m256i f; + int16_t ti; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, + 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); + const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, + 248,1984,62,496,3968,124,992,31); + const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, + 128,16,512,64,8,256,32,1024); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial in NTT representation. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). The coefficients are orderd as output by +* poly_ntt(); the serialized output coefficients are in bitreversed +* order. +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + ntttobytes_avx(r, a->vec, qdata.vec); +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + nttfrombytes_avx(r->vec, a, qdata.vec); +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ +#if (KYBER_INDCPA_MSGBYTES != 32) +#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" +#endif + __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; + const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); + const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); + const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); + +#define FROMMSG64(i) \ + g3 = _mm256_shuffle_epi32(f,0x55*i); \ + g3 = _mm256_sllv_epi32(g3,shift); \ + g3 = _mm256_shuffle_epi8(g3,idx); \ + g0 = _mm256_slli_epi16(g3,12); \ + g1 = _mm256_slli_epi16(g3,8); \ + g2 = _mm256_slli_epi16(g3,4); \ + g0 = _mm256_srai_epi16(g0,15); \ + g1 = _mm256_srai_epi16(g1,15); \ + g2 = _mm256_srai_epi16(g2,15); \ + g3 = _mm256_srai_epi16(g3,15); \ + g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ + g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ + g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ + g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ + h0 = _mm256_unpacklo_epi64(g0,g1); \ + h2 = _mm256_unpackhi_epi64(g0,g1); \ + h1 = _mm256_unpacklo_epi64(g2,g3); \ + h3 = _mm256_unpackhi_epi64(g2,g3); \ + g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ + g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ + g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ + g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ + _mm256_store_si256(&r->vec[0+2*i+0],g0); \ + _mm256_store_si256(&r->vec[0+2*i+1],g1); \ + _mm256_store_si256(&r->vec[8+2*i+0],g2); \ + _mm256_store_si256(&r->vec[8+2*i+1],g3) + + f = _mm256_loadu_si256((__m256i *)msg); + FROMMSG64(0); + FROMMSG64(1); + FROMMSG64(2); + FROMMSG64(3); +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *msg: pointer to output message +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) +{ + unsigned int i; + uint32_t small; + __m256i f0, f1, g0, g1; + const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); + const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_sub_epi16(hq, f0); + f1 = _mm256_sub_epi16(hq, f1); + g0 = _mm256_srai_epi16(f0, 15); + g1 = _mm256_srai_epi16(f1, 15); + f0 = _mm256_xor_si256(f0, g0); + f1 = _mm256_xor_si256(f1, g1); + f0 = _mm256_sub_epi16(f0, hhq); + f1 = _mm256_sub_epi16(f1, hhq); + f0 = _mm256_packs_epi16(f0, f1); + f0 = _mm256_permute4x64_epi64(f0, 0xD8); + small = _mm256_movemask_epi8(f0); + memcpy(&msg[4*i], &small, 4); + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 + prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); + poly_cbd_eta1(r, buf.vec); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; + prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); + poly_cbd_eta2(r, buf.vec); +} + +#ifndef KYBER_90S +#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta1(r2, buf[2].vec); + poly_cbd_eta1(r3, buf[3].vec); +} + +#if KYBER_K == 2 +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta2(r2, buf[2].vec); + poly_cbd_eta2(r3, buf[3].vec); +} +#endif +#endif + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place. +* Input coefficients assumed to be in normal order, +* output coefficients are in special order that is natural +* for the vectorization. Input coefficients are assumed to be +* bounded by q in absolute value, output coefficients are bounded +* by 16118 in absolute value. +* +* Arguments: - poly *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* Input coefficients assumed to be in special order from vectorized +* forward ntt, output in normal order. Input coefficients can be +* arbitrary 16-bit integers, output coefficients are bounded by 14870 +* in absolute value. +* +* Arguments: - poly *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt_avx(r->vec, qdata.vec); +} + +void poly_nttunpack(poly *r) +{ + nttunpack_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain. +* One of the input polynomials needs to have coefficients +* bounded by q, the other polynomial can have arbitrary +* coefficients. Output coefficients are bounded by 6656. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + basemul_avx(r->vec, a->vec, b->vec, qdata.vec); +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + tomont_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + reduce_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_add_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_sub_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h new file mode 100644 index 0000000000..6a9cf71c70 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h @@ -0,0 +1,77 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" + +typedef ALIGNED_INT16(KYBER_N) poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#ifndef KYBER_90S +#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + +#if KYBER_K == 2 +#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); +#endif +#endif + + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..a0174b7b3f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) +static void poly_compress10(uint8_t r[320], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(15); + const __m256i shift1 = _mm256_set1_epi16(1 << 12); + const __m256i mask = _mm256_set1_epi16(1023); + const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, + -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srli_epi64(f0,12); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blend_epi16(t0,t1,0xE0); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); + const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, + 6, 5, 5, 4, 4, 3, 3, 2, + 9, 8, 8, 7, 7, 6, 6, 5, + 4, 3, 3, 2, 2, 1, 1, 0); + const __m256i sllvdidx = _mm256_set1_epi64x(4); + const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) +static void poly_compress11(uint8_t r[352+2], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(36); + const __m256i shift1 = _mm256_set1_epi16(1 << 13); + const __m256i mask = _mm256_set1_epi16(2047); + const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(10); + const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); + const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, + -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f1 = _mm256_bsrli_epi128(f0,8); + f0 = _mm256_srlv_epi64(f0,srlvqidx); + f1 = _mm256_slli_epi64(f1,34); + f0 = _mm256_add_epi64(f0,f1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); + _mm_storel_epi64((__m128i *)&r[22*i+16],t1); + } +} + +static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, + 8, 7, 6, 5, 5, 4, 4, 3, + 10, 9, 9, 8, 7, 6, 6, 5, + 5, 4, 3, 2, 2, 1, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); + const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); + const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); + const __m256i mask = _mm256_set1_epi16(32752); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i]); +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i],&a[320*i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i],&a[352*i]); +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements in a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly tmp; + + poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); + for(i=1;ivec[i],&b->vec[i]); + poly_add(r,r,&tmp); + } +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..2ce23c31ff --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h new file mode 100644 index 0000000000..5368185b5f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h @@ -0,0 +1,12 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include "params.h" +#include + +#define reduce_avx KYBER_NAMESPACE(reduce_avx) +void reduce_avx(__m256i *r, const __m256i *qdata); +#define tomont_avx KYBER_NAMESPACE(tomont_avx) +void tomont_avx(__m256i *r, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..9060a44cb9 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include "params.h" +#include "consts.h" +#include "rejsample.h" + +//#define BMI + +#ifndef BMI +static const uint8_t idx[256][8] = { + {-1, -1, -1, -1, -1, -1, -1, -1}, + { 0, -1, -1, -1, -1, -1, -1, -1}, + { 2, -1, -1, -1, -1, -1, -1, -1}, + { 0, 2, -1, -1, -1, -1, -1, -1}, + { 4, -1, -1, -1, -1, -1, -1, -1}, + { 0, 4, -1, -1, -1, -1, -1, -1}, + { 2, 4, -1, -1, -1, -1, -1, -1}, + { 0, 2, 4, -1, -1, -1, -1, -1}, + { 6, -1, -1, -1, -1, -1, -1, -1}, + { 0, 6, -1, -1, -1, -1, -1, -1}, + { 2, 6, -1, -1, -1, -1, -1, -1}, + { 0, 2, 6, -1, -1, -1, -1, -1}, + { 4, 6, -1, -1, -1, -1, -1, -1}, + { 0, 4, 6, -1, -1, -1, -1, -1}, + { 2, 4, 6, -1, -1, -1, -1, -1}, + { 0, 2, 4, 6, -1, -1, -1, -1}, + { 8, -1, -1, -1, -1, -1, -1, -1}, + { 0, 8, -1, -1, -1, -1, -1, -1}, + { 2, 8, -1, -1, -1, -1, -1, -1}, + { 0, 2, 8, -1, -1, -1, -1, -1}, + { 4, 8, -1, -1, -1, -1, -1, -1}, + { 0, 4, 8, -1, -1, -1, -1, -1}, + { 2, 4, 8, -1, -1, -1, -1, -1}, + { 0, 2, 4, 8, -1, -1, -1, -1}, + { 6, 8, -1, -1, -1, -1, -1, -1}, + { 0, 6, 8, -1, -1, -1, -1, -1}, + { 2, 6, 8, -1, -1, -1, -1, -1}, + { 0, 2, 6, 8, -1, -1, -1, -1}, + { 4, 6, 8, -1, -1, -1, -1, -1}, + { 0, 4, 6, 8, -1, -1, -1, -1}, + { 2, 4, 6, 8, -1, -1, -1, -1}, + { 0, 2, 4, 6, 8, -1, -1, -1}, + {10, -1, -1, -1, -1, -1, -1, -1}, + { 0, 10, -1, -1, -1, -1, -1, -1}, + { 2, 10, -1, -1, -1, -1, -1, -1}, + { 0, 2, 10, -1, -1, -1, -1, -1}, + { 4, 10, -1, -1, -1, -1, -1, -1}, + { 0, 4, 10, -1, -1, -1, -1, -1}, + { 2, 4, 10, -1, -1, -1, -1, -1}, + { 0, 2, 4, 10, -1, -1, -1, -1}, + { 6, 10, -1, -1, -1, -1, -1, -1}, + { 0, 6, 10, -1, -1, -1, -1, -1}, + { 2, 6, 10, -1, -1, -1, -1, -1}, + { 0, 2, 6, 10, -1, -1, -1, -1}, + { 4, 6, 10, -1, -1, -1, -1, -1}, + { 0, 4, 6, 10, -1, -1, -1, -1}, + { 2, 4, 6, 10, -1, -1, -1, -1}, + { 0, 2, 4, 6, 10, -1, -1, -1}, + { 8, 10, -1, -1, -1, -1, -1, -1}, + { 0, 8, 10, -1, -1, -1, -1, -1}, + { 2, 8, 10, -1, -1, -1, -1, -1}, + { 0, 2, 8, 10, -1, -1, -1, -1}, + { 4, 8, 10, -1, -1, -1, -1, -1}, + { 0, 4, 8, 10, -1, -1, -1, -1}, + { 2, 4, 8, 10, -1, -1, -1, -1}, + { 0, 2, 4, 8, 10, -1, -1, -1}, + { 6, 8, 10, -1, -1, -1, -1, -1}, + { 0, 6, 8, 10, -1, -1, -1, -1}, + { 2, 6, 8, 10, -1, -1, -1, -1}, + { 0, 2, 6, 8, 10, -1, -1, -1}, + { 4, 6, 8, 10, -1, -1, -1, -1}, + { 0, 4, 6, 8, 10, -1, -1, -1}, + { 2, 4, 6, 8, 10, -1, -1, -1}, + { 0, 2, 4, 6, 8, 10, -1, -1}, + {12, -1, -1, -1, -1, -1, -1, -1}, + { 0, 12, -1, -1, -1, -1, -1, -1}, + { 2, 12, -1, -1, -1, -1, -1, -1}, + { 0, 2, 12, -1, -1, -1, -1, -1}, + { 4, 12, -1, -1, -1, -1, -1, -1}, + { 0, 4, 12, -1, -1, -1, -1, -1}, + { 2, 4, 12, -1, -1, -1, -1, -1}, + { 0, 2, 4, 12, -1, -1, -1, -1}, + { 6, 12, -1, -1, -1, -1, -1, -1}, + { 0, 6, 12, -1, -1, -1, -1, -1}, + { 2, 6, 12, -1, -1, -1, -1, -1}, + { 0, 2, 6, 12, -1, -1, -1, -1}, + { 4, 6, 12, -1, -1, -1, -1, -1}, + { 0, 4, 6, 12, -1, -1, -1, -1}, + { 2, 4, 6, 12, -1, -1, -1, -1}, + { 0, 2, 4, 6, 12, -1, -1, -1}, + { 8, 12, -1, -1, -1, -1, -1, -1}, + { 0, 8, 12, -1, -1, -1, -1, -1}, + { 2, 8, 12, -1, -1, -1, -1, -1}, + { 0, 2, 8, 12, -1, -1, -1, -1}, + { 4, 8, 12, -1, -1, -1, -1, -1}, + { 0, 4, 8, 12, -1, -1, -1, -1}, + { 2, 4, 8, 12, -1, -1, -1, -1}, + { 0, 2, 4, 8, 12, -1, -1, -1}, + { 6, 8, 12, -1, -1, -1, -1, -1}, + { 0, 6, 8, 12, -1, -1, -1, -1}, + { 2, 6, 8, 12, -1, -1, -1, -1}, + { 0, 2, 6, 8, 12, -1, -1, -1}, + { 4, 6, 8, 12, -1, -1, -1, -1}, + { 0, 4, 6, 8, 12, -1, -1, -1}, + { 2, 4, 6, 8, 12, -1, -1, -1}, + { 0, 2, 4, 6, 8, 12, -1, -1}, + {10, 12, -1, -1, -1, -1, -1, -1}, + { 0, 10, 12, -1, -1, -1, -1, -1}, + { 2, 10, 12, -1, -1, -1, -1, -1}, + { 0, 2, 10, 12, -1, -1, -1, -1}, + { 4, 10, 12, -1, -1, -1, -1, -1}, + { 0, 4, 10, 12, -1, -1, -1, -1}, + { 2, 4, 10, 12, -1, -1, -1, -1}, + { 0, 2, 4, 10, 12, -1, -1, -1}, + { 6, 10, 12, -1, -1, -1, -1, -1}, + { 0, 6, 10, 12, -1, -1, -1, -1}, + { 2, 6, 10, 12, -1, -1, -1, -1}, + { 0, 2, 6, 10, 12, -1, -1, -1}, + { 4, 6, 10, 12, -1, -1, -1, -1}, + { 0, 4, 6, 10, 12, -1, -1, -1}, + { 2, 4, 6, 10, 12, -1, -1, -1}, + { 0, 2, 4, 6, 10, 12, -1, -1}, + { 8, 10, 12, -1, -1, -1, -1, -1}, + { 0, 8, 10, 12, -1, -1, -1, -1}, + { 2, 8, 10, 12, -1, -1, -1, -1}, + { 0, 2, 8, 10, 12, -1, -1, -1}, + { 4, 8, 10, 12, -1, -1, -1, -1}, + { 0, 4, 8, 10, 12, -1, -1, -1}, + { 2, 4, 8, 10, 12, -1, -1, -1}, + { 0, 2, 4, 8, 10, 12, -1, -1}, + { 6, 8, 10, 12, -1, -1, -1, -1}, + { 0, 6, 8, 10, 12, -1, -1, -1}, + { 2, 6, 8, 10, 12, -1, -1, -1}, + { 0, 2, 6, 8, 10, 12, -1, -1}, + { 4, 6, 8, 10, 12, -1, -1, -1}, + { 0, 4, 6, 8, 10, 12, -1, -1}, + { 2, 4, 6, 8, 10, 12, -1, -1}, + { 0, 2, 4, 6, 8, 10, 12, -1}, + {14, -1, -1, -1, -1, -1, -1, -1}, + { 0, 14, -1, -1, -1, -1, -1, -1}, + { 2, 14, -1, -1, -1, -1, -1, -1}, + { 0, 2, 14, -1, -1, -1, -1, -1}, + { 4, 14, -1, -1, -1, -1, -1, -1}, + { 0, 4, 14, -1, -1, -1, -1, -1}, + { 2, 4, 14, -1, -1, -1, -1, -1}, + { 0, 2, 4, 14, -1, -1, -1, -1}, + { 6, 14, -1, -1, -1, -1, -1, -1}, + { 0, 6, 14, -1, -1, -1, -1, -1}, + { 2, 6, 14, -1, -1, -1, -1, -1}, + { 0, 2, 6, 14, -1, -1, -1, -1}, + { 4, 6, 14, -1, -1, -1, -1, -1}, + { 0, 4, 6, 14, -1, -1, -1, -1}, + { 2, 4, 6, 14, -1, -1, -1, -1}, + { 0, 2, 4, 6, 14, -1, -1, -1}, + { 8, 14, -1, -1, -1, -1, -1, -1}, + { 0, 8, 14, -1, -1, -1, -1, -1}, + { 2, 8, 14, -1, -1, -1, -1, -1}, + { 0, 2, 8, 14, -1, -1, -1, -1}, + { 4, 8, 14, -1, -1, -1, -1, -1}, + { 0, 4, 8, 14, -1, -1, -1, -1}, + { 2, 4, 8, 14, -1, -1, -1, -1}, + { 0, 2, 4, 8, 14, -1, -1, -1}, + { 6, 8, 14, -1, -1, -1, -1, -1}, + { 0, 6, 8, 14, -1, -1, -1, -1}, + { 2, 6, 8, 14, -1, -1, -1, -1}, + { 0, 2, 6, 8, 14, -1, -1, -1}, + { 4, 6, 8, 14, -1, -1, -1, -1}, + { 0, 4, 6, 8, 14, -1, -1, -1}, + { 2, 4, 6, 8, 14, -1, -1, -1}, + { 0, 2, 4, 6, 8, 14, -1, -1}, + {10, 14, -1, -1, -1, -1, -1, -1}, + { 0, 10, 14, -1, -1, -1, -1, -1}, + { 2, 10, 14, -1, -1, -1, -1, -1}, + { 0, 2, 10, 14, -1, -1, -1, -1}, + { 4, 10, 14, -1, -1, -1, -1, -1}, + { 0, 4, 10, 14, -1, -1, -1, -1}, + { 2, 4, 10, 14, -1, -1, -1, -1}, + { 0, 2, 4, 10, 14, -1, -1, -1}, + { 6, 10, 14, -1, -1, -1, -1, -1}, + { 0, 6, 10, 14, -1, -1, -1, -1}, + { 2, 6, 10, 14, -1, -1, -1, -1}, + { 0, 2, 6, 10, 14, -1, -1, -1}, + { 4, 6, 10, 14, -1, -1, -1, -1}, + { 0, 4, 6, 10, 14, -1, -1, -1}, + { 2, 4, 6, 10, 14, -1, -1, -1}, + { 0, 2, 4, 6, 10, 14, -1, -1}, + { 8, 10, 14, -1, -1, -1, -1, -1}, + { 0, 8, 10, 14, -1, -1, -1, -1}, + { 2, 8, 10, 14, -1, -1, -1, -1}, + { 0, 2, 8, 10, 14, -1, -1, -1}, + { 4, 8, 10, 14, -1, -1, -1, -1}, + { 0, 4, 8, 10, 14, -1, -1, -1}, + { 2, 4, 8, 10, 14, -1, -1, -1}, + { 0, 2, 4, 8, 10, 14, -1, -1}, + { 6, 8, 10, 14, -1, -1, -1, -1}, + { 0, 6, 8, 10, 14, -1, -1, -1}, + { 2, 6, 8, 10, 14, -1, -1, -1}, + { 0, 2, 6, 8, 10, 14, -1, -1}, + { 4, 6, 8, 10, 14, -1, -1, -1}, + { 0, 4, 6, 8, 10, 14, -1, -1}, + { 2, 4, 6, 8, 10, 14, -1, -1}, + { 0, 2, 4, 6, 8, 10, 14, -1}, + {12, 14, -1, -1, -1, -1, -1, -1}, + { 0, 12, 14, -1, -1, -1, -1, -1}, + { 2, 12, 14, -1, -1, -1, -1, -1}, + { 0, 2, 12, 14, -1, -1, -1, -1}, + { 4, 12, 14, -1, -1, -1, -1, -1}, + { 0, 4, 12, 14, -1, -1, -1, -1}, + { 2, 4, 12, 14, -1, -1, -1, -1}, + { 0, 2, 4, 12, 14, -1, -1, -1}, + { 6, 12, 14, -1, -1, -1, -1, -1}, + { 0, 6, 12, 14, -1, -1, -1, -1}, + { 2, 6, 12, 14, -1, -1, -1, -1}, + { 0, 2, 6, 12, 14, -1, -1, -1}, + { 4, 6, 12, 14, -1, -1, -1, -1}, + { 0, 4, 6, 12, 14, -1, -1, -1}, + { 2, 4, 6, 12, 14, -1, -1, -1}, + { 0, 2, 4, 6, 12, 14, -1, -1}, + { 8, 12, 14, -1, -1, -1, -1, -1}, + { 0, 8, 12, 14, -1, -1, -1, -1}, + { 2, 8, 12, 14, -1, -1, -1, -1}, + { 0, 2, 8, 12, 14, -1, -1, -1}, + { 4, 8, 12, 14, -1, -1, -1, -1}, + { 0, 4, 8, 12, 14, -1, -1, -1}, + { 2, 4, 8, 12, 14, -1, -1, -1}, + { 0, 2, 4, 8, 12, 14, -1, -1}, + { 6, 8, 12, 14, -1, -1, -1, -1}, + { 0, 6, 8, 12, 14, -1, -1, -1}, + { 2, 6, 8, 12, 14, -1, -1, -1}, + { 0, 2, 6, 8, 12, 14, -1, -1}, + { 4, 6, 8, 12, 14, -1, -1, -1}, + { 0, 4, 6, 8, 12, 14, -1, -1}, + { 2, 4, 6, 8, 12, 14, -1, -1}, + { 0, 2, 4, 6, 8, 12, 14, -1}, + {10, 12, 14, -1, -1, -1, -1, -1}, + { 0, 10, 12, 14, -1, -1, -1, -1}, + { 2, 10, 12, 14, -1, -1, -1, -1}, + { 0, 2, 10, 12, 14, -1, -1, -1}, + { 4, 10, 12, 14, -1, -1, -1, -1}, + { 0, 4, 10, 12, 14, -1, -1, -1}, + { 2, 4, 10, 12, 14, -1, -1, -1}, + { 0, 2, 4, 10, 12, 14, -1, -1}, + { 6, 10, 12, 14, -1, -1, -1, -1}, + { 0, 6, 10, 12, 14, -1, -1, -1}, + { 2, 6, 10, 12, 14, -1, -1, -1}, + { 0, 2, 6, 10, 12, 14, -1, -1}, + { 4, 6, 10, 12, 14, -1, -1, -1}, + { 0, 4, 6, 10, 12, 14, -1, -1}, + { 2, 4, 6, 10, 12, 14, -1, -1}, + { 0, 2, 4, 6, 10, 12, 14, -1}, + { 8, 10, 12, 14, -1, -1, -1, -1}, + { 0, 8, 10, 12, 14, -1, -1, -1}, + { 2, 8, 10, 12, 14, -1, -1, -1}, + { 0, 2, 8, 10, 12, 14, -1, -1}, + { 4, 8, 10, 12, 14, -1, -1, -1}, + { 0, 4, 8, 10, 12, 14, -1, -1}, + { 2, 4, 8, 10, 12, 14, -1, -1}, + { 0, 2, 4, 8, 10, 12, 14, -1}, + { 6, 8, 10, 12, 14, -1, -1, -1}, + { 0, 6, 8, 10, 12, 14, -1, -1}, + { 2, 6, 8, 10, 12, 14, -1, -1}, + { 0, 2, 6, 8, 10, 12, 14, -1}, + { 4, 6, 8, 10, 12, 14, -1, -1}, + { 0, 4, 6, 8, 10, 12, 14, -1}, + { 2, 4, 6, 8, 10, 12, 14, -1}, + { 0, 2, 4, 6, 8, 10, 12, 14} +}; +#endif + +#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) +#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) + +unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + uint32_t good; +#ifdef BMI + uint64_t idx0, idx1, idx2, idx3; +#endif + const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i mask = _mm256_set1_epi16(0xFFF); + const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, + 9, 8, 8, 7, 6, 5, 5, 4, + 11,10,10, 9, 8, 7, 7, 6, + 5, 4, 4, 3, 2, 1, 1, 0); + __m256i f0, f1, g0, g1, g2, g3; + __m128i f, t, pilo, pihi; + + ctr = pos = 0; + while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { + f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); + f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); + f0 = _mm256_permute4x64_epi64(f0, 0x94); + f1 = _mm256_permute4x64_epi64(f1, 0x94); + f0 = _mm256_shuffle_epi8(f0, idx8); + f1 = _mm256_shuffle_epi8(f1, idx8); + g0 = _mm256_srli_epi16(f0, 4); + g1 = _mm256_srli_epi16(f1, 4); + f0 = _mm256_blend_epi16(f0, g0, 0xAA); + f1 = _mm256_blend_epi16(f1, g1, 0xAA); + f0 = _mm256_and_si256(f0, mask); + f1 = _mm256_and_si256(f1, mask); + pos += 48; + + g0 = _mm256_cmpgt_epi16(bound, f0); + g1 = _mm256_cmpgt_epi16(bound, f1); + + g0 = _mm256_packs_epi16(g0, g1); + good = _mm256_movemask_epi8(g0); + +#ifdef BMI + idx0 = _pdep_u64(good >> 0, 0x0101010101010101); + idx1 = _pdep_u64(good >> 8, 0x0101010101010101); + idx2 = _pdep_u64(good >> 16, 0x0101010101010101); + idx3 = _pdep_u64(good >> 24, 0x0101010101010101); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + idx1 = (idx1 << 8) - idx1; + idx1 = _pext_u64(0x0E0C0A0806040200, idx1); + idx2 = (idx2 << 8) - idx2; + idx2 = _pext_u64(0x0E0C0A0806040200, idx2); + idx3 = (idx3 << 8) - idx3; + idx3 = _pext_u64(0x0E0C0A0806040200, idx3); + + g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); + g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); + g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); + g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); +#else + g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); + g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); + g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); + g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); +#endif + + g2 = _mm256_add_epi8(g0, ones); + g3 = _mm256_add_epi8(g1, ones); + g0 = _mm256_unpacklo_epi8(g0, g2); + g1 = _mm256_unpacklo_epi8(g1, g3); + + f0 = _mm256_shuffle_epi8(f0, g0); + f1 = _mm256_shuffle_epi8(f1, g1); + + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); + ctr += _mm_popcnt_u32((good >> 0) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); + ctr += _mm_popcnt_u32((good >> 16) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); + ctr += _mm_popcnt_u32((good >> 8) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); + ctr += _mm_popcnt_u32((good >> 24) & 0xFF); + } + + while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { + f = _mm_loadu_si128((__m128i *)&buf[pos]); + f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); + t = _mm_srli_epi16(f, 4); + f = _mm_blend_epi16(f, t, 0xAA); + f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); + pos += 12; + + t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); + good = _mm_movemask_epi8(t); + +#ifdef BMI + good &= 0x5555; + idx0 = _pdep_u64(good, 0x1111111111111111); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + pilo = _mm_cvtsi64_si128(idx0); +#else + good = _pext_u32(good, 0x5555); + pilo = _mm_loadl_epi64((__m128i *)&idx[good]); +#endif + + pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); + pilo = _mm_unpacklo_epi8(pilo, pihi); + f = _mm_shuffle_epi8(f, pilo); + _mm_storeu_si128((__m128i *)&r[ctr], f); + ctr += _mm_popcnt_u32(good); + } + + while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(val1 < KYBER_Q && ctr < KYBER_N) + r[ctr++] = val1; + } + + return ctr; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..3be5e2192e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h @@ -0,0 +1,14 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) + +#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..18325ebec0 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S @@ -0,0 +1,255 @@ +#include "consts.h" +.include "fq.inc" +.include "shuffle.inc" + +/* +nttpack_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +#store +vmovdqa %ymm7,(%rdi) +vmovdqa %ymm9,32(%rdi) +vmovdqa %ymm6,64(%rdi) +vmovdqa %ymm3,96(%rdi) +vmovdqa %ymm10,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm5,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret +*/ + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +shuffle1 9,5,10,5 +shuffle1 8,4,9,4 +shuffle1 7,3,8,3 +shuffle1 6,11,7,11 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm5,32(%rdi) +vmovdqa %ymm9,64(%rdi) +vmovdqa %ymm4,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm3,160(%rdi) +vmovdqa %ymm7,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret + +ntttobytes128_avx: +#load +vmovdqa (%rsi),%ymm5 +vmovdqa 32(%rsi),%ymm6 +vmovdqa 64(%rsi),%ymm7 +vmovdqa 96(%rsi),%ymm8 +vmovdqa 128(%rsi),%ymm9 +vmovdqa 160(%rsi),%ymm10 +vmovdqa 192(%rsi),%ymm11 +vmovdqa 224(%rsi),%ymm12 + +#csubq +csubq 5,13 +csubq 6,13 +csubq 7,13 +csubq 8,13 +csubq 9,13 +csubq 10,13 +csubq 11,13 +csubq 12,13 + +#bitpack +vpsllw $12,%ymm6,%ymm4 +vpor %ymm4,%ymm5,%ymm4 + +vpsrlw $4,%ymm6,%ymm5 +vpsllw $8,%ymm7,%ymm6 +vpor %ymm5,%ymm6,%ymm5 + +vpsrlw $8,%ymm7,%ymm6 +vpsllw $4,%ymm8,%ymm7 +vpor %ymm6,%ymm7,%ymm6 + +vpsllw $12,%ymm10,%ymm7 +vpor %ymm7,%ymm9,%ymm7 + +vpsrlw $4,%ymm10,%ymm8 +vpsllw $8,%ymm11,%ymm9 +vpor %ymm8,%ymm9,%ymm8 + +vpsrlw $8,%ymm11,%ymm9 +vpsllw $4,%ymm12,%ymm10 +vpor %ymm9,%ymm10,%ymm9 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 + +shuffle2 3,4,8,4 +shuffle2 6,5,3,5 +shuffle2 7,9,6,9 + +shuffle4 8,3,7,3 +shuffle4 6,4,8,4 +shuffle4 5,9,6,9 + +shuffle8 7,8,5,8 +shuffle8 6,3,7,3 +shuffle8 4,9,6,9 + +#store +vmovdqu %ymm5,(%rdi) +vmovdqu %ymm7,32(%rdi) +vmovdqu %ymm6,64(%rdi) +vmovdqu %ymm8,96(%rdi) +vmovdqu %ymm3,128(%rdi) +vmovdqu %ymm9,160(%rdi) + +ret + +.global cdecl(ntttobytes_avx) +cdecl(ntttobytes_avx): +#consts +vmovdqa _16XQ*2(%rdx),%ymm0 +call ntttobytes128_avx +add $256,%rsi +add $192,%rdi +call ntttobytes128_avx +ret + +nttfrombytes128_avx: +#load +vmovdqu (%rsi),%ymm4 +vmovdqu 32(%rsi),%ymm5 +vmovdqu 64(%rsi),%ymm6 +vmovdqu 96(%rsi),%ymm7 +vmovdqu 128(%rsi),%ymm8 +vmovdqu 160(%rsi),%ymm9 + +shuffle8 4,7,3,7 +shuffle8 5,8,4,8 +shuffle8 6,9,5,9 + +shuffle4 3,8,6,8 +shuffle4 7,5,3,5 +shuffle4 4,9,7,9 + +shuffle2 6,5,4,5 +shuffle2 8,7,6,7 +shuffle2 3,9,8,9 + +shuffle1 4,7,10,7 +shuffle1 5,8,4,8 +shuffle1 6,9,5,9 + +#bitunpack +vpsrlw $12,%ymm10,%ymm11 +vpsllw $4,%ymm7,%ymm12 +vpor %ymm11,%ymm12,%ymm11 +vpand %ymm0,%ymm10,%ymm10 +vpand %ymm0,%ymm11,%ymm11 + +vpsrlw $8,%ymm7,%ymm12 +vpsllw $8,%ymm4,%ymm13 +vpor %ymm12,%ymm13,%ymm12 +vpand %ymm0,%ymm12,%ymm12 + +vpsrlw $4,%ymm4,%ymm13 +vpand %ymm0,%ymm13,%ymm13 + +vpsrlw $12,%ymm8,%ymm14 +vpsllw $4,%ymm5,%ymm15 +vpor %ymm14,%ymm15,%ymm14 +vpand %ymm0,%ymm8,%ymm8 +vpand %ymm0,%ymm14,%ymm14 + +vpsrlw $8,%ymm5,%ymm15 +vpsllw $8,%ymm9,%ymm1 +vpor %ymm15,%ymm1,%ymm15 +vpand %ymm0,%ymm15,%ymm15 + +vpsrlw $4,%ymm9,%ymm1 +vpand %ymm0,%ymm1,%ymm1 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm11,32(%rdi) +vmovdqa %ymm12,64(%rdi) +vmovdqa %ymm13,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm14,160(%rdi) +vmovdqa %ymm15,192(%rdi) +vmovdqa %ymm1,224(%rdi) + +ret + +.global cdecl(nttfrombytes_avx) +cdecl(nttfrombytes_avx): +#consts +vmovdqa _16XMASK*2(%rdx),%ymm0 +call nttfrombytes128_avx +add $256,%rdi +add $192,%rsi +call nttfrombytes128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..e4941f7a86 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h @@ -0,0 +1,34 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" +#include "fips202x4.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c new file mode 100644 index 0000000000..aa8e2850b1 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint64_t r; + __m256i f, g, h; + + h = _mm256_setzero_si256(); + for(i=0;i> 63; + return r; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + __m256i xvec, rvec, bvec; + + bvec = _mm256_set1_epi64x(-(uint64_t)b); + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h new file mode 100644 index 0000000000..70d40f3f3e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c new file mode 100644 index 0000000000..1500ffea56 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c @@ -0,0 +1,128 @@ +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: load32_littleendian +* +* Description: load 4 bytes into a 32-bit integer +* in little-endian order +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x +**************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* +* Name: load24_littleendian +* +* Description: load 3 bytes into a 32-bit integer +* in little-endian order. +* This function is only needed for Kyber-512 +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) +**************************************************/ +#if KYBER_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x55555555; + + for(j=0;j<8;j++) { + a = (d >> (4*j+0)) & 0x3; + b = (d >> (4*j+2)) & 0x3; + r->coeffs[8*i+j] = a - b; + } + } +} + +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3. +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +#if KYBER_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x00249249; + d += (t>>2) & 0x00249249; + + for(j=0;j<4;j++) { + a = (d >> (6*j+0)) & 0x7; + b = (d >> (6*j+3)) & 0x7; + r->coeffs[4*i+j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h new file mode 100644 index 0000000000..7b677d745d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h @@ -0,0 +1,14 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c new file mode 100644 index 0000000000..4a8b4c894f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk +* and the public seed used to generate the matrix A. +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +// Not static for benchmarking +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) +{ + unsigned int ctr, i, j, k; + unsigned int buflen, off; + uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; + xof_state state; + xof_init(&state, seed); + + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c new file mode 100644 index 0000000000..2f2eb10b2f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c @@ -0,0 +1,146 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint8_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 +}; + +void init_ntt() { + unsigned int i; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i=1;i<128;i++) + tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); + + for(i=0;i<128;i++) { + zetas[i] = tmp[tree[i]]; + if(zetas[i] > KYBER_Q/2) + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; + } +} +*/ + +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, + -171, 622, 1577, 182, 962, -1202, -1474, 1468, + 573, -1325, 264, 383, -829, 1458, -1602, -130, + -681, 1017, 732, 608, -1542, 411, -205, -1571, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -1103, 430, 555, 843, -1251, 871, 1550, 105, + 422, 587, 177, -235, -291, -460, 1574, 1653, + -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, + 817, 1097, 603, 610, 1322, -1285, -1465, 384, + -1215, -136, 1218, -1335, -874, 220, -1187, -1659, + -1185, -1530, -1278, 794, -1510, -854, -870, 478, + -108, -308, 996, 991, 958, -1460, 1522, 1628 +}; + +/************************************************* +* Name: fqmul +* +* Description: Multiplication followed by Montgomery reduction +* +* Arguments: - int16_t a: first factor +* - int16_t b: second factor +* +* Returns 16-bit integer congruent to a*b*R^{-1} mod q +**************************************************/ +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} + +/************************************************* +* Name: ntt +* +* Description: Inplace number-theoretic transform (NTT) in Rq. +* input is in standard order, output is in bitreversed order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void ntt(int16_t r[256]) { + unsigned int len, start, j, k; + int16_t t, zeta; + + k = 1; + for(len = 128; len >= 2; len >>= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k++]; + for(j = start; j < start + len; j++) { + t = fqmul(zeta, r[j + len]); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inplace inverse number-theoretic transform in Rq and +* multiplication by Montgomery factor 2^16. +* Input is in bitreversed order, output is in standard order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void invntt(int16_t r[256]) { + unsigned int start, len, j, k; + int16_t t, zeta; + const int16_t f = 1441; // mont^2/128 + + k = 127; + for(len = 2; len <= 128; len <<= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k--]; + for(j = start; j < start + len; j++) { + t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(zeta, r[j + len]); + } + } + } + + for(j = 0; j < 256; j++) + r[j] = fqmul(r[j], f); +} + +/************************************************* +* Name: basemul +* +* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) +* used for multiplication of elements in Rq in NTT domain +* +* Arguments: - int16_t r[2]: pointer to the output polynomial +* - const int16_t a[2]: pointer to the first factor +* - const int16_t b[2]: pointer to the second factor +* - int16_t zeta: integer defining the reduction polynomial +**************************************************/ +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) +{ + r[0] = fqmul(a[1], b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a[0], b[0]); + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h new file mode 100644 index 0000000000..227ea74f08 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define zetas KYBER_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +#define ntt KYBER_NAMESPACE(ntt) +void ntt(int16_t poly[256]); + +#define invntt KYBER_NAMESPACE(invntt) +void invntt(int16_t poly[256]); + +#define basemul KYBER_NAMESPACE(basemul) +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h new file mode 100644 index 0000000000..36b2b987f3 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h @@ -0,0 +1,55 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_ref_##s +#elif (KYBER_K == 3) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_ref_##s +#elif (KYBER_K == 4) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_ref_##s +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c new file mode 100644 index 0000000000..0fe5a20f63 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c @@ -0,0 +1,360 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) +{ + unsigned int i,j; + int32_t u; + uint32_t d0; + uint8_t t[8]; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); + r[1] = t[2] | (t[3] << 4); + r[2] = t[4] | (t[5] << 4); + r[3] = t[6] | (t[7] << 4); + r += 4; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; + } + + r[0] = (t[0] >> 0) | (t[1] << 5); + r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); + r[2] = (t[3] >> 1) | (t[4] << 4); + r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); + r[4] = (t[6] >> 2) | (t[7] << 3); + r += 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_decompress +* +* Description: De-serialization and subsequent decompression of a polynomial; +* approximate inverse of poly_compress +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYCOMPRESSEDBYTES bytes) +**************************************************/ +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) +{ + unsigned int i; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; + r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; + a += 1; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + unsigned int j; + uint8_t t[8]; + for(i=0;i> 0); + t[1] = (a[0] >> 5) | (a[1] << 3); + t[2] = (a[1] >> 2); + t[3] = (a[1] >> 7) | (a[2] << 1); + t[4] = (a[2] >> 4) | (a[3] << 4); + t[5] = (a[3] >> 1); + t[6] = (a[3] >> 6) | (a[4] << 2); + t[7] = (a[4] >> 3); + a += 5; + + for(j=0;j<8;j++) + r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + unsigned int i; + uint16_t t0, t1; + + for(i=0;icoeffs[2*i]; + t0 += ((int16_t)t0 >> 15) & KYBER_Q; + t1 = a->coeffs[2*i+1]; + t1 += ((int16_t)t1 >> 15) & KYBER_Q; + r[3*i+0] = (t0 >> 0); + r[3*i+1] = (t0 >> 8) | (t1 << 4); + r[3*i+2] = (t1 >> 4); + } +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + unsigned int i; + for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; + r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; + } +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ + unsigned int i,j; + int16_t mask; + +#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) +#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" +#endif + + for(i=0;i> j)&1); + r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); + } + } +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message +* +* Arguments: - uint8_t *msg: pointer to output message +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) +{ + unsigned int i,j; + uint32_t t; + + for(i=0;icoeffs[8*i+j]; + // t += ((int16_t)t >> 15) & KYBER_Q; + // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; + t <<= 1; + t += 1665; + t *= 80635; + t >>= 28; + t &= 1; + msg[i] |= t << j; + } + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA1*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta1(r, buf); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA2*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); +} + + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in normal order, output in bitreversed order +* +* Arguments: - uint16_t *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt(r->coeffs); + poly_reduce(r); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* inputs assumed to be in bitreversed order, output in normal order +* +* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt(r->coeffs); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); + basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); + } +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + unsigned int i; + const int16_t f = (1ULL << 32) % KYBER_Q; + for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + unsigned int i; + for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h new file mode 100644 index 0000000000..9a99c7cdad --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h @@ -0,0 +1,53 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct{ + int16_t coeffs[KYBER_N]; +} poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c new file mode 100644 index 0000000000..661c71ec32 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c @@ -0,0 +1,247 @@ +#include +#include "params.h" +#include "poly.h" +#include "polyvec.h" + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) +{ + unsigned int i,j,k; + uint64_t d0; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;ivec[i].coeffs[8*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; + + } + + r[ 0] = (t[0] >> 0); + r[ 1] = (t[0] >> 8) | (t[1] << 3); + r[ 2] = (t[1] >> 5) | (t[2] << 6); + r[ 3] = (t[2] >> 2); + r[ 4] = (t[2] >> 10) | (t[3] << 1); + r[ 5] = (t[3] >> 7) | (t[4] << 4); + r[ 6] = (t[4] >> 4) | (t[5] << 7); + r[ 7] = (t[5] >> 1); + r[ 8] = (t[5] >> 9) | (t[6] << 2); + r[ 9] = (t[6] >> 6) | (t[7] << 5); + r[10] = (t[7] >> 3); + r += 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;ivec[i].coeffs[4*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); + r[1] = (t[0] >> 8) | (t[1] << 2); + r[2] = (t[1] >> 6) | (t[2] << 4); + r[3] = (t[2] >> 4) | (t[3] << 6); + r[4] = (t[3] >> 2); + r += 5; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) +{ + unsigned int i,j,k; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); + t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); + t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); + t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); + t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); + t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); + t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); + t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); + a += 11; + + for(k=0;k<8;k++) + r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;i> 0) | ((uint16_t)a[1] << 8); + t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); + t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); + t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); + a += 5; + + for(k=0;k<4;k++) + r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements of a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly t; + + poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); + for(i=1;ivec[i], &b->vec[i]); + poly_add(r, r, &t); + } + + poly_reduce(r); +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h new file mode 100644 index 0000000000..57b605494e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c new file mode 100644 index 0000000000..9d8e7edf83 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c @@ -0,0 +1,42 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: Montgomery reduction; given a 32-bit integer a, computes +* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 +* +* Arguments: - int32_t a: input integer to be reduced; +* has to be in {-q2^15,...,q2^15-1} +* +* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. +**************************************************/ +int16_t montgomery_reduce(int32_t a) +{ + int16_t t; + + t = (int16_t)a*QINV; + t = (a - (int32_t)t*KYBER_Q) >> 16; + return t; +} + +/************************************************* +* Name: barrett_reduce +* +* Description: Barrett reduction; given a 16-bit integer a, computes +* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} +* +* Arguments: - int16_t a: input integer to be reduced +* +* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. +**************************************************/ +int16_t barrett_reduce(int16_t a) { + int16_t t; + const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; + + t = ((int32_t)v*a + (1<<25)) >> 26; + t *= KYBER_Q; + return a - t; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h new file mode 100644 index 0000000000..c1bc1e4c7b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h @@ -0,0 +1,16 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 + +#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a); + +#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h new file mode 100644 index 0000000000..2acc66f98d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h @@ -0,0 +1,35 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_init(STATE, SEED) shake128_inc_init(STATE) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define xof_release(STATE) shake128_inc_ctx_release(STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c new file mode 100644 index 0000000000..ed4a6541f8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c @@ -0,0 +1,47 @@ +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint8_t r = 0; + + for(i=0;i> 63; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + + b = -b; + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h new file mode 100644 index 0000000000..3463866f37 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT16(N) \ + union { \ + int16_t coeffs[N]; \ + __m256i vec[(N+15)/16]; \ + } + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h new file mode 100644 index 0000000000..a154e80f1d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S new file mode 100644 index 0000000000..36990639b2 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S @@ -0,0 +1,105 @@ +#include "consts.h" + +.macro schoolbook off +vmovdqa _16XQINV*2(%rcx),%ymm0 +vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 +vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 +vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 +vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 + +vpmullw %ymm0,%ymm1,%ymm9 # a0.lo +vpmullw %ymm0,%ymm2,%ymm10 # b0.lo +vpmullw %ymm0,%ymm3,%ymm11 # a1.lo +vpmullw %ymm0,%ymm4,%ymm12 # b1.lo + +vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 +vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 + +vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi +vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi +vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi +vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi + +vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 +vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 + +vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi +vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi +vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi +vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi + +vmovdqa %ymm13,(%rsp) + +vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo +vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo +vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo +vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo + +vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo +vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo +vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo +vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo + +vmovdqa _16XQ*2(%rcx),%ymm8 +vpmulhw %ymm8,%ymm13,%ymm13 +vpmulhw %ymm8,%ymm9,%ymm9 +vpmulhw %ymm8,%ymm5,%ymm5 +vpmulhw %ymm8,%ymm10,%ymm10 +vpmulhw %ymm8,%ymm6,%ymm6 +vpmulhw %ymm8,%ymm11,%ymm11 +vpmulhw %ymm8,%ymm7,%ymm7 +vpmulhw %ymm8,%ymm12,%ymm12 + +vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 +vpsubw %ymm9,%ymm1,%ymm9 # a0d0 +vpsubw %ymm5,%ymm14,%ymm5 # b0c0 +vpsubw %ymm10,%ymm2,%ymm10 # b0d0 + +vpsubw %ymm6,%ymm15,%ymm6 # a1c1 +vpsubw %ymm11,%ymm3,%ymm11 # a1d1 +vpsubw %ymm7,%ymm0,%ymm7 # b1c1 +vpsubw %ymm12,%ymm4,%ymm12 # b1d1 + +vmovdqa (%r9),%ymm0 +vmovdqa 32(%r9),%ymm1 +vpmullw %ymm0,%ymm10,%ymm2 +vpmullw %ymm0,%ymm12,%ymm3 +vpmulhw %ymm1,%ymm10,%ymm10 +vpmulhw %ymm1,%ymm12,%ymm12 +vpmulhw %ymm8,%ymm2,%ymm2 +vpmulhw %ymm8,%ymm3,%ymm3 +vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 +vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 + +vpaddw %ymm5,%ymm9,%ymm9 +vpaddw %ymm7,%ymm11,%ymm11 +vpsubw %ymm13,%ymm10,%ymm13 +vpsubw %ymm12,%ymm6,%ymm6 + +vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(64*\off+16)*2(%rdi) +vmovdqa %ymm6,(64*\off+32)*2(%rdi) +vmovdqa %ymm11,(64*\off+48)*2(%rdi) +.endm + +.text +.global cdecl(basemul_avx) +cdecl(basemul_avx): +mov %rsp,%r8 +and $-32,%rsp +sub $32,%rsp + +lea (_ZETAS_EXP+176)*2(%rcx),%r9 +schoolbook 0 + +add $32*2,%r9 +schoolbook 1 + +add $192*2,%r9 +schoolbook 2 + +add $32*2,%r9 +schoolbook 3 + +mov %r8,%rsp +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c new file mode 100644 index 0000000000..dad473c79e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c @@ -0,0 +1,144 @@ +#include +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask55 = _mm256_set1_epi32(0x55555555); + const __m256i mask33 = _mm256_set1_epi32(0x33333333); + const __m256i mask03 = _mm256_set1_epi32(0x03030303); + const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); + + for(i = 0; i < KYBER_N/64; i++) { + f0 = _mm256_load_si256(&buf[i]); + + f1 = _mm256_srli_epi16(f0, 1); + f0 = _mm256_and_si256(mask55, f0); + f1 = _mm256_and_si256(mask55, f1); + f0 = _mm256_add_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 2); + f0 = _mm256_and_si256(mask33, f0); + f1 = _mm256_and_si256(mask33, f1); + f0 = _mm256_add_epi8(f0, mask33); + f0 = _mm256_sub_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 4); + f0 = _mm256_and_si256(mask0F, f0); + f1 = _mm256_and_si256(mask0F, f1); + f0 = _mm256_sub_epi8(f0, mask03); + f1 = _mm256_sub_epi8(f1, mask03); + + f2 = _mm256_unpacklo_epi8(f0, f1); + f3 = _mm256_unpackhi_epi8(f0, f1); + + f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); + f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); + f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); + f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); + + _mm256_store_si256(&r->vec[4*i+0], f0); + _mm256_store_si256(&r->vec[4*i+1], f2); + _mm256_store_si256(&r->vec[4*i+2], f1); + _mm256_store_si256(&r->vec[4*i+3], f3); + } +} + +#if KYBER_ETA1 == 3 +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3 +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask249 = _mm256_set1_epi32(0x249249); + const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); + const __m256i mask07 = _mm256_set1_epi32(7); + const __m256i mask70 = _mm256_set1_epi32(7 << 16); + const __m256i mask3 = _mm256_set1_epi16(3); + const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); + + for(i = 0; i < KYBER_N/32; i++) { + f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); + f0 = _mm256_permute4x64_epi64(f0,0x94); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + + f1 = _mm256_srli_epi32(f0,1); + f2 = _mm256_srli_epi32(f0,2); + f0 = _mm256_and_si256(mask249,f0); + f1 = _mm256_and_si256(mask249,f1); + f2 = _mm256_and_si256(mask249,f2); + f0 = _mm256_add_epi32(f0,f1); + f0 = _mm256_add_epi32(f0,f2); + + f1 = _mm256_srli_epi32(f0,3); + f0 = _mm256_add_epi32(f0,mask6DB); + f0 = _mm256_sub_epi32(f0,f1); + + f1 = _mm256_slli_epi32(f0,10); + f2 = _mm256_srli_epi32(f0,12); + f3 = _mm256_srli_epi32(f0, 2); + f0 = _mm256_and_si256(f0,mask07); + f1 = _mm256_and_si256(f1,mask70); + f2 = _mm256_and_si256(f2,mask07); + f3 = _mm256_and_si256(f3,mask70); + f0 = _mm256_add_epi16(f0,f1); + f1 = _mm256_add_epi16(f2,f3); + f0 = _mm256_sub_epi16(f0,mask3); + f1 = _mm256_sub_epi16(f1,mask3); + + f2 = _mm256_unpacklo_epi32(f0,f1); + f3 = _mm256_unpackhi_epi32(f0,f1); + + f0 = _mm256_permute2x128_si256(f2,f3,0x20); + f1 = _mm256_permute2x128_si256(f2,f3,0x31); + + _mm256_store_si256(&r->vec[2*i+0], f0); + _mm256_store_si256(&r->vec[2*i+1], f1); + } +} +#endif + +/* buf 32 bytes longer for cbd3 */ +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, (uint8_t *)buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h new file mode 100644 index 0000000000..05788e06b4 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h @@ -0,0 +1,15 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c new file mode 100644 index 0000000000..84e596893d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c @@ -0,0 +1,121 @@ +#include "align.h" +#include "params.h" +#include "consts.h" + +#define Q KYBER_Q +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 +#define V 20159 // floor(2^26/q + 0.5) +#define FHI 1441 // mont^2/128 +#define FLO -10079 // qinv*FHI +#define MONTSQHI 1353 // mont^2 +#define MONTSQLO 20553 // qinv*MONTSQHI +#define MASK 4095 +#define SHIFT 32 + +const qdata_t qdata = {{ +#define _16XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, + +#define _16XQINV 16 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _16XV 32 + V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, + +#define _16XFLO 48 + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + +#define _16XFHI 64 + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + +#define _16XMONTSQLO 80 + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + +#define _16XMONTSQHI 96 + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + +#define _16XMASK 112 + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + +#define _REVIDXB 128 + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + +#define _REVIDXD 144 + 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, + +#define _ZETAS_EXP 160 + 31498, 31498, 31498, 31498, -758, -758, -758, -758, + 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + -359, -359, -359, -359, -359, -359, -359, -359, + -359, -359, -359, -359, -359, -359, -359, -359, + 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, + -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, + 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, + 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, + -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, + -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, + -171, -171, -171, -171, 622, 622, 622, 622, + 1577, 1577, 1577, 1577, 182, 182, 182, 182, + -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, + 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, + 573, 573, -1325, -1325, 264, 264, 383, 383, + -829, -829, 1458, 1458, -1602, -1602, -130, -130, + -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, + -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, + 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, + -1103, 555, -1251, 1550, 422, 177, -291, 1574, + -246, 1159, -777, -602, -1590, -872, 418, -156, + 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, + -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, + 430, 843, 871, 105, 587, -235, -460, 1653, + 778, -147, 1483, 1119, 644, 349, 329, -75, + 787, 787, 787, 787, 787, 787, 787, 787, + 787, 787, 787, 787, 787, 787, 787, 787, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, + -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, + 287, 287, 287, 287, 287, 287, 287, 287, + 202, 202, 202, 202, 202, 202, 202, 202, + 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, + -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, + 962, 962, 962, 962, -1202, -1202, -1202, -1202, + -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, + -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, + 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, + -681, -681, 1017, 1017, 732, 732, 608, 608, + -1542, -1542, 411, 411, -205, -205, -1571, -1571, + 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, + 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, + 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, + 817, 603, 1322, -1465, -1215, 1218, -874, -1187, + -1185, -1278, -1510, -870, -108, 996, 958, 1522, + 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, + -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, + 1097, 610, -1285, 384, -136, -1335, 220, -1659, + -1530, 794, -854, 478, -308, 991, -1460, 1628, + +#define _16XSHIFT 624 + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT +}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h new file mode 100644 index 0000000000..f95899cd8e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h @@ -0,0 +1,43 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _16XQ 0 +#define _16XQINV 16 +#define _16XV 32 +#define _16XFLO 48 +#define _16XFHI 64 +#define _16XMONTSQLO 80 +#define _16XMONTSQHI 96 +#define _16XMASK 112 +#define _REVIDXB 128 +#define _REVIDXD 144 +#define _ZETAS_EXP 160 +#define _16XSHIFT 624 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#ifdef __ASSEMBLER__ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define cdecl2(s) decorate(s) +#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) +#else +#define cdecl(s) KYBER_NAMESPACE(##s) +#endif +#endif + +#ifndef __ASSEMBLER__ +#include "align.h" +typedef ALIGNED_INT16(640) qdata_t; +#define qdata KYBER_NAMESPACE(qdata) +extern const qdata_t qdata; +#endif + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S new file mode 100644 index 0000000000..3bb1ebd3d8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S @@ -0,0 +1,88 @@ +#include "consts.h" +.include "fq.inc" + +.text +reduce128_avx: +#load +vmovdqa (%rdi),%ymm2 +vmovdqa 32(%rdi),%ymm3 +vmovdqa 64(%rdi),%ymm4 +vmovdqa 96(%rdi),%ymm5 +vmovdqa 128(%rdi),%ymm6 +vmovdqa 160(%rdi),%ymm7 +vmovdqa 192(%rdi),%ymm8 +vmovdqa 224(%rdi),%ymm9 + +red16 2 +red16 3 +red16 4 +red16 5 +red16 6 +red16 7 +red16 8 +red16 9 + +#store +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm3,32(%rdi) +vmovdqa %ymm4,64(%rdi) +vmovdqa %ymm5,96(%rdi) +vmovdqa %ymm6,128(%rdi) +vmovdqa %ymm7,160(%rdi) +vmovdqa %ymm8,192(%rdi) +vmovdqa %ymm9,224(%rdi) + +ret + +.global cdecl(reduce_avx) +cdecl(reduce_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XV*2(%rsi),%ymm1 +call reduce128_avx +add $256,%rdi +call reduce128_avx +ret + +tomont128_avx: +#load +vmovdqa (%rdi),%ymm3 +vmovdqa 32(%rdi),%ymm4 +vmovdqa 64(%rdi),%ymm5 +vmovdqa 96(%rdi),%ymm6 +vmovdqa 128(%rdi),%ymm7 +vmovdqa 160(%rdi),%ymm8 +vmovdqa 192(%rdi),%ymm9 +vmovdqa 224(%rdi),%ymm10 + +fqmulprecomp 1,2,3,11 +fqmulprecomp 1,2,4,12 +fqmulprecomp 1,2,5,13 +fqmulprecomp 1,2,6,14 +fqmulprecomp 1,2,7,15 +fqmulprecomp 1,2,8,11 +fqmulprecomp 1,2,9,12 +fqmulprecomp 1,2,10,13 + +#store +vmovdqa %ymm3,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm5,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm7,128(%rdi) +vmovdqa %ymm8,160(%rdi) +vmovdqa %ymm9,192(%rdi) +vmovdqa %ymm10,224(%rdi) + +ret + +.global cdecl(tomont_avx) +cdecl(tomont_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 +vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 +call tomont128_avx +add $256,%rdi +call tomont128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc new file mode 100644 index 0000000000..4b7afc3118 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc @@ -0,0 +1,30 @@ +.macro red16 r,rs=0,x=12 +vpmulhw %ymm1,%ymm\r,%ymm\x +.if \rs +vpmulhrsw %ymm\rs,%ymm\x,%ymm\x +.else +vpsraw $10,%ymm\x,%ymm\x +.endif +vpmullw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro csubq r,x=12 +vpsubw %ymm0,%ymm\r,%ymm\r +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro caddq r,x=12 +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro fqmulprecomp al,ah,b,x=12 +vpmullw %ymm\al,%ymm\b,%ymm\x +vpmulhw %ymm\ah,%ymm\b,%ymm\b +vpmulhw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\b,%ymm\b +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c new file mode 100644 index 0000000000..572ce49007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c @@ -0,0 +1,566 @@ +#include +#include +#include +#include +#include "align.h" +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "cbd.h" +#include "rejsample.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk and the +* public seed used to generate the matrix A. +* The polynomial coefficients in pk are assumed to +* lie in the invertal [0,q], i.e. pk must be reduced +* by polyvec_reduce(). +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key. +* The polynomial coefficients in sk are assumed to +* lie in the invertal [0,q], i.e. sk must be reduced +* by polyvec_reduce(). +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v. +* The polynomial coefficients in b and v are assumed to +* lie in the invertal [0,q], i.e. b and v must be reduced +* by polyvec_reduce() and poly_reduce(), respectively. +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output array +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#if KYBER_K == 2 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 1; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 1; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[1].vec[0]); + poly_nttunpack(&a[1].vec[1]); + shake128x4_inc_ctx_release(&state); +} +#elif KYBER_K == 3 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128incctx state1x; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 0; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 0; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[0].vec[2]); + poly_nttunpack(&a[1].vec[0]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 2; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 2; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 2; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 2; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); + + poly_nttunpack(&a[1].vec[1]); + poly_nttunpack(&a[1].vec[2]); + poly_nttunpack(&a[2].vec[0]); + poly_nttunpack(&a[2].vec[1]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + buf[0].coeffs[32] = 2; + buf[0].coeffs[33] = 2; + + shake128_inc_init(&state1x); + shake128_absorb_once(&state1x, buf[0].coeffs, 34); + shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); + ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); + while(ctr0 < KYBER_N) { + shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); + ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + } + shake128_inc_ctx_release(&state1x); + + poly_nttunpack(&a[2].vec[2]); +} +#elif KYBER_K == 4 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int i, ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128x4_inc_init(&state); + + for(i=0;i<4;i++) { + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = i; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = i; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = i; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = i; + buf[3].coeffs[33] = 3; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = i; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = i; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = i; + buf[3].coeffs[32] = 3; + buf[3].coeffs[33] = i; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[i].vec[0]); + poly_nttunpack(&a[i].vec[1]); + poly_nttunpack(&a[i].vec[2]); + poly_nttunpack(&a[i].vec[3]); + } + shake128x4_inc_ctx_release(&state); +} +#endif + +/************************************************* +* Name: indcpa_keypair_derand +* +* Description: Generates public and private key for the CPA-secure +* public-key encryption scheme underlying Kyber +* +* Arguments: - uint8_t *pk: pointer to output public key +* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (of length KYBER_SYMBYTES bytes) +**************************************************/ +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]) +{ + unsigned int i; + uint8_t buf[2*KYBER_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + KYBER_SYMBYTES; + polyvec a[KYBER_K], e, pkpv, skpv; + + hash_g(buf, coins, KYBER_SYMBYTES); + + gen_a(a, publicseed); + +#if KYBER_K == 2 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); +#elif KYBER_K == 3 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); +#elif KYBER_K == 4 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_reduce(&skpv); + polyvec_ntt(&e); + + // matrix-vector multiplication + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S new file mode 100644 index 0000000000..76d4189996 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S @@ -0,0 +1,193 @@ +#include "consts.h" +.include "shuffle.inc" +.include "fq.inc" + +.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 +vpsubw %ymm\rl0,%ymm\rh0,%ymm12 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 +vpsubw %ymm\rl1,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl0,%ymm12,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 +vpsubw %ymm\rl2,%ymm\rh2,%ymm14 + +vpmullw %ymm\zl0,%ymm13,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 +vpsubw %ymm\rl3,%ymm\rh3,%ymm15 + +vpmullw %ymm\zl1,%ymm14,%ymm\rh2 +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 +vpmullw %ymm\zl1,%ymm15,%ymm\rh3 + +vpmulhw %ymm\zh0,%ymm12,%ymm12 +vpmulhw %ymm\zh0,%ymm13,%ymm13 + +vpmulhw %ymm\zh1,%ymm14,%ymm14 +vpmulhw %ymm\zh1,%ymm15,%ymm15 + +vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 + +vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 + +# + +# + +vpsubw %ymm\rh0,%ymm12,%ymm\rh0 + +vpsubw %ymm\rh1,%ymm13,%ymm\rh1 + +vpsubw %ymm\rh2,%ymm14,%ymm\rh2 +vpsubw %ymm\rh3,%ymm15,%ymm\rh3 +.endm + +.macro intt_levels0t5 off +/* level 0 */ +vmovdqa _16XFLO*2(%rsi),%ymm2 +vmovdqa _16XFHI*2(%rsi),%ymm3 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +fqmulprecomp 2,3,4 +fqmulprecomp 2,3,6 +fqmulprecomp 2,3,5 +fqmulprecomp 2,3,7 + +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 + +fqmulprecomp 2,3,8 +fqmulprecomp 2,3,10 +fqmulprecomp 2,3,9 +fqmulprecomp 2,3,11 + +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm12 +vpshufb %ymm12,%ymm15,%ymm15 +vpshufb %ymm12,%ymm1,%ymm1 +vpshufb %ymm12,%ymm2,%ymm2 +vpshufb %ymm12,%ymm3,%ymm3 + +butterfly 4,5,8,9,6,7,10,11,15,1,2,3 + +/* level 1 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm1 +vpshufb %ymm1,%ymm2,%ymm2 +vpshufb %ymm1,%ymm3,%ymm3 + +butterfly 4,5,6,7,8,9,10,11,2,2,3,3 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +/* level 2 */ +vmovdqa _REVIDXD*2(%rsi),%ymm12 +vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 +vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 + +butterfly 3,4,6,8,5,7,9,11,2,2,10,10 + +vmovdqa _16XV*2(%rsi),%ymm1 +red16 3 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +/* level 3 */ +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 + +butterfly 10,3,6,5,4,8,7,11,2,2,9,9 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +/* level 4 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 + +butterfly 9,10,6,4,3,5,8,11,2,2,7,7 + +red16 9 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 +vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 + +butterfly 7,9,6,3,10,4,5,11,2,2,8,8 + +vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.macro intt_level6 off +/* level 6 */ +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 + +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 + +butterfly 4,5,6,7,8,9,10,11 + +.if \off == 0 +red16 4 +.endif + +vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +intt_levels0t5 0 +intt_levels0t5 1 + +intt_level6 0 +intt_level6 1 +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S new file mode 100644 index 0000000000..0ce7b41297 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S @@ -0,0 +1,189 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 +vpmullw %ymm\zl0,%ymm\rh0,%ymm12 +vpmullw %ymm\zl0,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl1,%ymm\rh2,%ymm14 +vpmullw %ymm\zl1,%ymm\rh3,%ymm15 + +vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 +vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 +.endm + +.macro reduce +vpmulhw %ymm0,%ymm12,%ymm12 +vpmulhw %ymm0,%ymm13,%ymm13 + +vpmulhw %ymm0,%ymm14,%ymm14 +vpmulhw %ymm0,%ymm15,%ymm15 +.endm + +.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln +vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 + +vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 +vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 + +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 +vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 + +vpsubw %ymm12,%ymm\rln,%ymm\rln +vpaddw %ymm12,%ymm\rh0,%ymm\rh0 +vpsubw %ymm13,%ymm\rl0,%ymm\rl0 + +vpaddw %ymm13,%ymm\rh1,%ymm\rh1 +vpsubw %ymm14,%ymm\rl1,%ymm\rl1 +vpaddw %ymm14,%ymm\rh2,%ymm\rh2 + +vpsubw %ymm15,%ymm\rl2,%ymm\rl2 +vpaddw %ymm15,%ymm\rh3,%ymm\rh3 +.endm + +.macro level0 off +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.macro levels1t6 off +/* level 1 */ +vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 +vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +/* level 2 */ +shuffle8 5,10,7,10 +shuffle8 6,11,5,11 + +vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 + +mul 7,10,5,11 + +shuffle8 3,8,6,8 +shuffle8 4,9,3,9 + +reduce +update 4,6,8,3,9,7,10,5,11 + +/* level 3 */ +shuffle4 8,5,9,5 +shuffle4 3,11,8,11 + +vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 + +mul 9,5,8,11 + +shuffle4 4,7,3,7 +shuffle4 6,10,4,10 + +reduce +update 6,3,7,4,10,9,5,8,11 + +/* level 4 */ +shuffle2 7,8,10,8 +shuffle2 4,11,7,11 + +vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 + +mul 10,8,7,11 + +shuffle2 6,9,4,9 +shuffle2 3,5,6,5 + +reduce +update 3,4,9,6,5,10,8,7,11 + +/* level 5 */ +shuffle1 9,7,5,7 +shuffle1 6,11,9,11 + +vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 + +mul 5,7,9,11 + +shuffle1 3,10,6,10 +shuffle1 4,8,3,8 + +reduce +update 4,6,10,3,8,5,7,9,11 + +/* level 6 */ +vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 +vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 +vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 + +mul 10,3,9,11,14,15,8,2 + +reduce +update 8,4,6,5,7,10,3,9,11 + +vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +level0 0 +level0 1 + +levels1t6 0 +levels1t6 1 + +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h new file mode 100644 index 0000000000..a4f48e343b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h @@ -0,0 +1,28 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include + +#define ntt_avx KYBER_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *r, const __m256i *qdata); +#define invntt_avx KYBER_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *r, const __m256i *qdata); + +#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) +void nttpack_avx(__m256i *r, const __m256i *qdata); +#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *r, const __m256i *qdata); + +#define basemul_avx KYBER_NAMESPACE(basemul_avx) +void basemul_avx(__m256i *r, + const __m256i *a, + const __m256i *b, + const __m256i *qdata); + +#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) +void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); +#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) +void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h new file mode 100644 index 0000000000..fdc688ea2b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h @@ -0,0 +1,68 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + +//#define KYBER_90S /* Uncomment this if you want the 90S variant */ + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s +#endif +#elif (KYBER_K == 3) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s +#endif +#elif (KYBER_K == 4) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s +#endif +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c new file mode 100644 index 0000000000..681fd6d23e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c @@ -0,0 +1,519 @@ +#include +#include +#include +#include "align.h" +#include "fips202x4.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +#if (KYBER_POLYCOMPRESSEDBYTES == 128) +void poly_compress(uint8_t r[128], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 9); + const __m256i mask = _mm256_set1_epi16(15); + const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); + const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); + + for(i=0;ivec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f2 = _mm256_mulhi_epi16(f2,v); + f3 = _mm256_mulhi_epi16(f3,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f2 = _mm256_mulhrs_epi16(f2,shift1); + f3 = _mm256_mulhrs_epi16(f3,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f2 = _mm256_and_si256(f2,mask); + f3 = _mm256_and_si256(f3,mask); + f0 = _mm256_packus_epi16(f0,f1); + f2 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift2); + f2 = _mm256_maddubs_epi16(f2,shift2); + f0 = _mm256_packus_epi16(f0,f2); + f0 = _mm256_permutevar8x32_epi32(f0,permdidx); + _mm256_storeu_si256((__m256i *)&r[32*i],f0); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[128]) +{ + unsigned int i; + __m128i t; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, + 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); + const __m256i mask = _mm256_set1_epi32(0x00F0000F); + const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) +void poly_compress(uint8_t r[160], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 10); + const __m256i mask = _mm256_set1_epi16(31); + const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); + const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, + -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 + f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srlv_epi64(f0,sllvdidx); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[160]) +{ + unsigned int i; + __m128i t; + __m256i f; + int16_t ti; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, + 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); + const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, + 248,1984,62,496,3968,124,992,31); + const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, + 128,16,512,64,8,256,32,1024); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial in NTT representation. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). The coefficients are orderd as output by +* poly_ntt(); the serialized output coefficients are in bitreversed +* order. +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + ntttobytes_avx(r, a->vec, qdata.vec); +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + nttfrombytes_avx(r->vec, a, qdata.vec); +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ +#if (KYBER_INDCPA_MSGBYTES != 32) +#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" +#endif + __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; + const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); + const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); + const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); + +#define FROMMSG64(i) \ + g3 = _mm256_shuffle_epi32(f,0x55*i); \ + g3 = _mm256_sllv_epi32(g3,shift); \ + g3 = _mm256_shuffle_epi8(g3,idx); \ + g0 = _mm256_slli_epi16(g3,12); \ + g1 = _mm256_slli_epi16(g3,8); \ + g2 = _mm256_slli_epi16(g3,4); \ + g0 = _mm256_srai_epi16(g0,15); \ + g1 = _mm256_srai_epi16(g1,15); \ + g2 = _mm256_srai_epi16(g2,15); \ + g3 = _mm256_srai_epi16(g3,15); \ + g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ + g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ + g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ + g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ + h0 = _mm256_unpacklo_epi64(g0,g1); \ + h2 = _mm256_unpackhi_epi64(g0,g1); \ + h1 = _mm256_unpacklo_epi64(g2,g3); \ + h3 = _mm256_unpackhi_epi64(g2,g3); \ + g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ + g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ + g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ + g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ + _mm256_store_si256(&r->vec[0+2*i+0],g0); \ + _mm256_store_si256(&r->vec[0+2*i+1],g1); \ + _mm256_store_si256(&r->vec[8+2*i+0],g2); \ + _mm256_store_si256(&r->vec[8+2*i+1],g3) + + f = _mm256_loadu_si256((__m256i *)msg); + FROMMSG64(0); + FROMMSG64(1); + FROMMSG64(2); + FROMMSG64(3); +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *msg: pointer to output message +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) +{ + unsigned int i; + uint32_t small; + __m256i f0, f1, g0, g1; + const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); + const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_sub_epi16(hq, f0); + f1 = _mm256_sub_epi16(hq, f1); + g0 = _mm256_srai_epi16(f0, 15); + g1 = _mm256_srai_epi16(f1, 15); + f0 = _mm256_xor_si256(f0, g0); + f1 = _mm256_xor_si256(f1, g1); + f0 = _mm256_sub_epi16(f0, hhq); + f1 = _mm256_sub_epi16(f1, hhq); + f0 = _mm256_packs_epi16(f0, f1); + f0 = _mm256_permute4x64_epi64(f0, 0xD8); + small = _mm256_movemask_epi8(f0); + memcpy(&msg[4*i], &small, 4); + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 + prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); + poly_cbd_eta1(r, buf.vec); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; + prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); + poly_cbd_eta2(r, buf.vec); +} + +#ifndef KYBER_90S +#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta1(r2, buf[2].vec); + poly_cbd_eta1(r3, buf[3].vec); +} + +#if KYBER_K == 2 +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta2(r2, buf[2].vec); + poly_cbd_eta2(r3, buf[3].vec); +} +#endif +#endif + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place. +* Input coefficients assumed to be in normal order, +* output coefficients are in special order that is natural +* for the vectorization. Input coefficients are assumed to be +* bounded by q in absolute value, output coefficients are bounded +* by 16118 in absolute value. +* +* Arguments: - poly *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* Input coefficients assumed to be in special order from vectorized +* forward ntt, output in normal order. Input coefficients can be +* arbitrary 16-bit integers, output coefficients are bounded by 14870 +* in absolute value. +* +* Arguments: - poly *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt_avx(r->vec, qdata.vec); +} + +void poly_nttunpack(poly *r) +{ + nttunpack_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain. +* One of the input polynomials needs to have coefficients +* bounded by q, the other polynomial can have arbitrary +* coefficients. Output coefficients are bounded by 6656. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + basemul_avx(r->vec, a->vec, b->vec, qdata.vec); +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + tomont_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + reduce_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_add_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_sub_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h new file mode 100644 index 0000000000..6a9cf71c70 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h @@ -0,0 +1,77 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" + +typedef ALIGNED_INT16(KYBER_N) poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#ifndef KYBER_90S +#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + +#if KYBER_K == 2 +#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); +#endif +#endif + + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..a0174b7b3f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) +static void poly_compress10(uint8_t r[320], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(15); + const __m256i shift1 = _mm256_set1_epi16(1 << 12); + const __m256i mask = _mm256_set1_epi16(1023); + const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, + -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srli_epi64(f0,12); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blend_epi16(t0,t1,0xE0); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); + const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, + 6, 5, 5, 4, 4, 3, 3, 2, + 9, 8, 8, 7, 7, 6, 6, 5, + 4, 3, 3, 2, 2, 1, 1, 0); + const __m256i sllvdidx = _mm256_set1_epi64x(4); + const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) +static void poly_compress11(uint8_t r[352+2], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(36); + const __m256i shift1 = _mm256_set1_epi16(1 << 13); + const __m256i mask = _mm256_set1_epi16(2047); + const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(10); + const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); + const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, + -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f1 = _mm256_bsrli_epi128(f0,8); + f0 = _mm256_srlv_epi64(f0,srlvqidx); + f1 = _mm256_slli_epi64(f1,34); + f0 = _mm256_add_epi64(f0,f1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); + _mm_storel_epi64((__m128i *)&r[22*i+16],t1); + } +} + +static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, + 8, 7, 6, 5, 5, 4, 4, 3, + 10, 9, 9, 8, 7, 6, 6, 5, + 5, 4, 3, 2, 2, 1, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); + const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); + const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); + const __m256i mask = _mm256_set1_epi16(32752); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i]); +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i],&a[320*i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i],&a[352*i]); +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements in a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly tmp; + + poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); + for(i=1;ivec[i],&b->vec[i]); + poly_add(r,r,&tmp); + } +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..2ce23c31ff --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h new file mode 100644 index 0000000000..5368185b5f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h @@ -0,0 +1,12 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include "params.h" +#include + +#define reduce_avx KYBER_NAMESPACE(reduce_avx) +void reduce_avx(__m256i *r, const __m256i *qdata); +#define tomont_avx KYBER_NAMESPACE(tomont_avx) +void tomont_avx(__m256i *r, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..9060a44cb9 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include "params.h" +#include "consts.h" +#include "rejsample.h" + +//#define BMI + +#ifndef BMI +static const uint8_t idx[256][8] = { + {-1, -1, -1, -1, -1, -1, -1, -1}, + { 0, -1, -1, -1, -1, -1, -1, -1}, + { 2, -1, -1, -1, -1, -1, -1, -1}, + { 0, 2, -1, -1, -1, -1, -1, -1}, + { 4, -1, -1, -1, -1, -1, -1, -1}, + { 0, 4, -1, -1, -1, -1, -1, -1}, + { 2, 4, -1, -1, -1, -1, -1, -1}, + { 0, 2, 4, -1, -1, -1, -1, -1}, + { 6, -1, -1, -1, -1, -1, -1, -1}, + { 0, 6, -1, -1, -1, -1, -1, -1}, + { 2, 6, -1, -1, -1, -1, -1, -1}, + { 0, 2, 6, -1, -1, -1, -1, -1}, + { 4, 6, -1, -1, -1, -1, -1, -1}, + { 0, 4, 6, -1, -1, -1, -1, -1}, + { 2, 4, 6, -1, -1, -1, -1, -1}, + { 0, 2, 4, 6, -1, -1, -1, -1}, + { 8, -1, -1, -1, -1, -1, -1, -1}, + { 0, 8, -1, -1, -1, -1, -1, -1}, + { 2, 8, -1, -1, -1, -1, -1, -1}, + { 0, 2, 8, -1, -1, -1, -1, -1}, + { 4, 8, -1, -1, -1, -1, -1, -1}, + { 0, 4, 8, -1, -1, -1, -1, -1}, + { 2, 4, 8, -1, -1, -1, -1, -1}, + { 0, 2, 4, 8, -1, -1, -1, -1}, + { 6, 8, -1, -1, -1, -1, -1, -1}, + { 0, 6, 8, -1, -1, -1, -1, -1}, + { 2, 6, 8, -1, -1, -1, -1, -1}, + { 0, 2, 6, 8, -1, -1, -1, -1}, + { 4, 6, 8, -1, -1, -1, -1, -1}, + { 0, 4, 6, 8, -1, -1, -1, -1}, + { 2, 4, 6, 8, -1, -1, -1, -1}, + { 0, 2, 4, 6, 8, -1, -1, -1}, + {10, -1, -1, -1, -1, -1, -1, -1}, + { 0, 10, -1, -1, -1, -1, -1, -1}, + { 2, 10, -1, -1, -1, -1, -1, -1}, + { 0, 2, 10, -1, -1, -1, -1, -1}, + { 4, 10, -1, -1, -1, -1, -1, -1}, + { 0, 4, 10, -1, -1, -1, -1, -1}, + { 2, 4, 10, -1, -1, -1, -1, -1}, + { 0, 2, 4, 10, -1, -1, -1, -1}, + { 6, 10, -1, -1, -1, -1, -1, -1}, + { 0, 6, 10, -1, -1, -1, -1, -1}, + { 2, 6, 10, -1, -1, -1, -1, -1}, + { 0, 2, 6, 10, -1, -1, -1, -1}, + { 4, 6, 10, -1, -1, -1, -1, -1}, + { 0, 4, 6, 10, -1, -1, -1, -1}, + { 2, 4, 6, 10, -1, -1, -1, -1}, + { 0, 2, 4, 6, 10, -1, -1, -1}, + { 8, 10, -1, -1, -1, -1, -1, -1}, + { 0, 8, 10, -1, -1, -1, -1, -1}, + { 2, 8, 10, -1, -1, -1, -1, -1}, + { 0, 2, 8, 10, -1, -1, -1, -1}, + { 4, 8, 10, -1, -1, -1, -1, -1}, + { 0, 4, 8, 10, -1, -1, -1, -1}, + { 2, 4, 8, 10, -1, -1, -1, -1}, + { 0, 2, 4, 8, 10, -1, -1, -1}, + { 6, 8, 10, -1, -1, -1, -1, -1}, + { 0, 6, 8, 10, -1, -1, -1, -1}, + { 2, 6, 8, 10, -1, -1, -1, -1}, + { 0, 2, 6, 8, 10, -1, -1, -1}, + { 4, 6, 8, 10, -1, -1, -1, -1}, + { 0, 4, 6, 8, 10, -1, -1, -1}, + { 2, 4, 6, 8, 10, -1, -1, -1}, + { 0, 2, 4, 6, 8, 10, -1, -1}, + {12, -1, -1, -1, -1, -1, -1, -1}, + { 0, 12, -1, -1, -1, -1, -1, -1}, + { 2, 12, -1, -1, -1, -1, -1, -1}, + { 0, 2, 12, -1, -1, -1, -1, -1}, + { 4, 12, -1, -1, -1, -1, -1, -1}, + { 0, 4, 12, -1, -1, -1, -1, -1}, + { 2, 4, 12, -1, -1, -1, -1, -1}, + { 0, 2, 4, 12, -1, -1, -1, -1}, + { 6, 12, -1, -1, -1, -1, -1, -1}, + { 0, 6, 12, -1, -1, -1, -1, -1}, + { 2, 6, 12, -1, -1, -1, -1, -1}, + { 0, 2, 6, 12, -1, -1, -1, -1}, + { 4, 6, 12, -1, -1, -1, -1, -1}, + { 0, 4, 6, 12, -1, -1, -1, -1}, + { 2, 4, 6, 12, -1, -1, -1, -1}, + { 0, 2, 4, 6, 12, -1, -1, -1}, + { 8, 12, -1, -1, -1, -1, -1, -1}, + { 0, 8, 12, -1, -1, -1, -1, -1}, + { 2, 8, 12, -1, -1, -1, -1, -1}, + { 0, 2, 8, 12, -1, -1, -1, -1}, + { 4, 8, 12, -1, -1, -1, -1, -1}, + { 0, 4, 8, 12, -1, -1, -1, -1}, + { 2, 4, 8, 12, -1, -1, -1, -1}, + { 0, 2, 4, 8, 12, -1, -1, -1}, + { 6, 8, 12, -1, -1, -1, -1, -1}, + { 0, 6, 8, 12, -1, -1, -1, -1}, + { 2, 6, 8, 12, -1, -1, -1, -1}, + { 0, 2, 6, 8, 12, -1, -1, -1}, + { 4, 6, 8, 12, -1, -1, -1, -1}, + { 0, 4, 6, 8, 12, -1, -1, -1}, + { 2, 4, 6, 8, 12, -1, -1, -1}, + { 0, 2, 4, 6, 8, 12, -1, -1}, + {10, 12, -1, -1, -1, -1, -1, -1}, + { 0, 10, 12, -1, -1, -1, -1, -1}, + { 2, 10, 12, -1, -1, -1, -1, -1}, + { 0, 2, 10, 12, -1, -1, -1, -1}, + { 4, 10, 12, -1, -1, -1, -1, -1}, + { 0, 4, 10, 12, -1, -1, -1, -1}, + { 2, 4, 10, 12, -1, -1, -1, -1}, + { 0, 2, 4, 10, 12, -1, -1, -1}, + { 6, 10, 12, -1, -1, -1, -1, -1}, + { 0, 6, 10, 12, -1, -1, -1, -1}, + { 2, 6, 10, 12, -1, -1, -1, -1}, + { 0, 2, 6, 10, 12, -1, -1, -1}, + { 4, 6, 10, 12, -1, -1, -1, -1}, + { 0, 4, 6, 10, 12, -1, -1, -1}, + { 2, 4, 6, 10, 12, -1, -1, -1}, + { 0, 2, 4, 6, 10, 12, -1, -1}, + { 8, 10, 12, -1, -1, -1, -1, -1}, + { 0, 8, 10, 12, -1, -1, -1, -1}, + { 2, 8, 10, 12, -1, -1, -1, -1}, + { 0, 2, 8, 10, 12, -1, -1, -1}, + { 4, 8, 10, 12, -1, -1, -1, -1}, + { 0, 4, 8, 10, 12, -1, -1, -1}, + { 2, 4, 8, 10, 12, -1, -1, -1}, + { 0, 2, 4, 8, 10, 12, -1, -1}, + { 6, 8, 10, 12, -1, -1, -1, -1}, + { 0, 6, 8, 10, 12, -1, -1, -1}, + { 2, 6, 8, 10, 12, -1, -1, -1}, + { 0, 2, 6, 8, 10, 12, -1, -1}, + { 4, 6, 8, 10, 12, -1, -1, -1}, + { 0, 4, 6, 8, 10, 12, -1, -1}, + { 2, 4, 6, 8, 10, 12, -1, -1}, + { 0, 2, 4, 6, 8, 10, 12, -1}, + {14, -1, -1, -1, -1, -1, -1, -1}, + { 0, 14, -1, -1, -1, -1, -1, -1}, + { 2, 14, -1, -1, -1, -1, -1, -1}, + { 0, 2, 14, -1, -1, -1, -1, -1}, + { 4, 14, -1, -1, -1, -1, -1, -1}, + { 0, 4, 14, -1, -1, -1, -1, -1}, + { 2, 4, 14, -1, -1, -1, -1, -1}, + { 0, 2, 4, 14, -1, -1, -1, -1}, + { 6, 14, -1, -1, -1, -1, -1, -1}, + { 0, 6, 14, -1, -1, -1, -1, -1}, + { 2, 6, 14, -1, -1, -1, -1, -1}, + { 0, 2, 6, 14, -1, -1, -1, -1}, + { 4, 6, 14, -1, -1, -1, -1, -1}, + { 0, 4, 6, 14, -1, -1, -1, -1}, + { 2, 4, 6, 14, -1, -1, -1, -1}, + { 0, 2, 4, 6, 14, -1, -1, -1}, + { 8, 14, -1, -1, -1, -1, -1, -1}, + { 0, 8, 14, -1, -1, -1, -1, -1}, + { 2, 8, 14, -1, -1, -1, -1, -1}, + { 0, 2, 8, 14, -1, -1, -1, -1}, + { 4, 8, 14, -1, -1, -1, -1, -1}, + { 0, 4, 8, 14, -1, -1, -1, -1}, + { 2, 4, 8, 14, -1, -1, -1, -1}, + { 0, 2, 4, 8, 14, -1, -1, -1}, + { 6, 8, 14, -1, -1, -1, -1, -1}, + { 0, 6, 8, 14, -1, -1, -1, -1}, + { 2, 6, 8, 14, -1, -1, -1, -1}, + { 0, 2, 6, 8, 14, -1, -1, -1}, + { 4, 6, 8, 14, -1, -1, -1, -1}, + { 0, 4, 6, 8, 14, -1, -1, -1}, + { 2, 4, 6, 8, 14, -1, -1, -1}, + { 0, 2, 4, 6, 8, 14, -1, -1}, + {10, 14, -1, -1, -1, -1, -1, -1}, + { 0, 10, 14, -1, -1, -1, -1, -1}, + { 2, 10, 14, -1, -1, -1, -1, -1}, + { 0, 2, 10, 14, -1, -1, -1, -1}, + { 4, 10, 14, -1, -1, -1, -1, -1}, + { 0, 4, 10, 14, -1, -1, -1, -1}, + { 2, 4, 10, 14, -1, -1, -1, -1}, + { 0, 2, 4, 10, 14, -1, -1, -1}, + { 6, 10, 14, -1, -1, -1, -1, -1}, + { 0, 6, 10, 14, -1, -1, -1, -1}, + { 2, 6, 10, 14, -1, -1, -1, -1}, + { 0, 2, 6, 10, 14, -1, -1, -1}, + { 4, 6, 10, 14, -1, -1, -1, -1}, + { 0, 4, 6, 10, 14, -1, -1, -1}, + { 2, 4, 6, 10, 14, -1, -1, -1}, + { 0, 2, 4, 6, 10, 14, -1, -1}, + { 8, 10, 14, -1, -1, -1, -1, -1}, + { 0, 8, 10, 14, -1, -1, -1, -1}, + { 2, 8, 10, 14, -1, -1, -1, -1}, + { 0, 2, 8, 10, 14, -1, -1, -1}, + { 4, 8, 10, 14, -1, -1, -1, -1}, + { 0, 4, 8, 10, 14, -1, -1, -1}, + { 2, 4, 8, 10, 14, -1, -1, -1}, + { 0, 2, 4, 8, 10, 14, -1, -1}, + { 6, 8, 10, 14, -1, -1, -1, -1}, + { 0, 6, 8, 10, 14, -1, -1, -1}, + { 2, 6, 8, 10, 14, -1, -1, -1}, + { 0, 2, 6, 8, 10, 14, -1, -1}, + { 4, 6, 8, 10, 14, -1, -1, -1}, + { 0, 4, 6, 8, 10, 14, -1, -1}, + { 2, 4, 6, 8, 10, 14, -1, -1}, + { 0, 2, 4, 6, 8, 10, 14, -1}, + {12, 14, -1, -1, -1, -1, -1, -1}, + { 0, 12, 14, -1, -1, -1, -1, -1}, + { 2, 12, 14, -1, -1, -1, -1, -1}, + { 0, 2, 12, 14, -1, -1, -1, -1}, + { 4, 12, 14, -1, -1, -1, -1, -1}, + { 0, 4, 12, 14, -1, -1, -1, -1}, + { 2, 4, 12, 14, -1, -1, -1, -1}, + { 0, 2, 4, 12, 14, -1, -1, -1}, + { 6, 12, 14, -1, -1, -1, -1, -1}, + { 0, 6, 12, 14, -1, -1, -1, -1}, + { 2, 6, 12, 14, -1, -1, -1, -1}, + { 0, 2, 6, 12, 14, -1, -1, -1}, + { 4, 6, 12, 14, -1, -1, -1, -1}, + { 0, 4, 6, 12, 14, -1, -1, -1}, + { 2, 4, 6, 12, 14, -1, -1, -1}, + { 0, 2, 4, 6, 12, 14, -1, -1}, + { 8, 12, 14, -1, -1, -1, -1, -1}, + { 0, 8, 12, 14, -1, -1, -1, -1}, + { 2, 8, 12, 14, -1, -1, -1, -1}, + { 0, 2, 8, 12, 14, -1, -1, -1}, + { 4, 8, 12, 14, -1, -1, -1, -1}, + { 0, 4, 8, 12, 14, -1, -1, -1}, + { 2, 4, 8, 12, 14, -1, -1, -1}, + { 0, 2, 4, 8, 12, 14, -1, -1}, + { 6, 8, 12, 14, -1, -1, -1, -1}, + { 0, 6, 8, 12, 14, -1, -1, -1}, + { 2, 6, 8, 12, 14, -1, -1, -1}, + { 0, 2, 6, 8, 12, 14, -1, -1}, + { 4, 6, 8, 12, 14, -1, -1, -1}, + { 0, 4, 6, 8, 12, 14, -1, -1}, + { 2, 4, 6, 8, 12, 14, -1, -1}, + { 0, 2, 4, 6, 8, 12, 14, -1}, + {10, 12, 14, -1, -1, -1, -1, -1}, + { 0, 10, 12, 14, -1, -1, -1, -1}, + { 2, 10, 12, 14, -1, -1, -1, -1}, + { 0, 2, 10, 12, 14, -1, -1, -1}, + { 4, 10, 12, 14, -1, -1, -1, -1}, + { 0, 4, 10, 12, 14, -1, -1, -1}, + { 2, 4, 10, 12, 14, -1, -1, -1}, + { 0, 2, 4, 10, 12, 14, -1, -1}, + { 6, 10, 12, 14, -1, -1, -1, -1}, + { 0, 6, 10, 12, 14, -1, -1, -1}, + { 2, 6, 10, 12, 14, -1, -1, -1}, + { 0, 2, 6, 10, 12, 14, -1, -1}, + { 4, 6, 10, 12, 14, -1, -1, -1}, + { 0, 4, 6, 10, 12, 14, -1, -1}, + { 2, 4, 6, 10, 12, 14, -1, -1}, + { 0, 2, 4, 6, 10, 12, 14, -1}, + { 8, 10, 12, 14, -1, -1, -1, -1}, + { 0, 8, 10, 12, 14, -1, -1, -1}, + { 2, 8, 10, 12, 14, -1, -1, -1}, + { 0, 2, 8, 10, 12, 14, -1, -1}, + { 4, 8, 10, 12, 14, -1, -1, -1}, + { 0, 4, 8, 10, 12, 14, -1, -1}, + { 2, 4, 8, 10, 12, 14, -1, -1}, + { 0, 2, 4, 8, 10, 12, 14, -1}, + { 6, 8, 10, 12, 14, -1, -1, -1}, + { 0, 6, 8, 10, 12, 14, -1, -1}, + { 2, 6, 8, 10, 12, 14, -1, -1}, + { 0, 2, 6, 8, 10, 12, 14, -1}, + { 4, 6, 8, 10, 12, 14, -1, -1}, + { 0, 4, 6, 8, 10, 12, 14, -1}, + { 2, 4, 6, 8, 10, 12, 14, -1}, + { 0, 2, 4, 6, 8, 10, 12, 14} +}; +#endif + +#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) +#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) + +unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + uint32_t good; +#ifdef BMI + uint64_t idx0, idx1, idx2, idx3; +#endif + const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i mask = _mm256_set1_epi16(0xFFF); + const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, + 9, 8, 8, 7, 6, 5, 5, 4, + 11,10,10, 9, 8, 7, 7, 6, + 5, 4, 4, 3, 2, 1, 1, 0); + __m256i f0, f1, g0, g1, g2, g3; + __m128i f, t, pilo, pihi; + + ctr = pos = 0; + while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { + f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); + f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); + f0 = _mm256_permute4x64_epi64(f0, 0x94); + f1 = _mm256_permute4x64_epi64(f1, 0x94); + f0 = _mm256_shuffle_epi8(f0, idx8); + f1 = _mm256_shuffle_epi8(f1, idx8); + g0 = _mm256_srli_epi16(f0, 4); + g1 = _mm256_srli_epi16(f1, 4); + f0 = _mm256_blend_epi16(f0, g0, 0xAA); + f1 = _mm256_blend_epi16(f1, g1, 0xAA); + f0 = _mm256_and_si256(f0, mask); + f1 = _mm256_and_si256(f1, mask); + pos += 48; + + g0 = _mm256_cmpgt_epi16(bound, f0); + g1 = _mm256_cmpgt_epi16(bound, f1); + + g0 = _mm256_packs_epi16(g0, g1); + good = _mm256_movemask_epi8(g0); + +#ifdef BMI + idx0 = _pdep_u64(good >> 0, 0x0101010101010101); + idx1 = _pdep_u64(good >> 8, 0x0101010101010101); + idx2 = _pdep_u64(good >> 16, 0x0101010101010101); + idx3 = _pdep_u64(good >> 24, 0x0101010101010101); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + idx1 = (idx1 << 8) - idx1; + idx1 = _pext_u64(0x0E0C0A0806040200, idx1); + idx2 = (idx2 << 8) - idx2; + idx2 = _pext_u64(0x0E0C0A0806040200, idx2); + idx3 = (idx3 << 8) - idx3; + idx3 = _pext_u64(0x0E0C0A0806040200, idx3); + + g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); + g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); + g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); + g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); +#else + g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); + g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); + g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); + g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); +#endif + + g2 = _mm256_add_epi8(g0, ones); + g3 = _mm256_add_epi8(g1, ones); + g0 = _mm256_unpacklo_epi8(g0, g2); + g1 = _mm256_unpacklo_epi8(g1, g3); + + f0 = _mm256_shuffle_epi8(f0, g0); + f1 = _mm256_shuffle_epi8(f1, g1); + + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); + ctr += _mm_popcnt_u32((good >> 0) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); + ctr += _mm_popcnt_u32((good >> 16) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); + ctr += _mm_popcnt_u32((good >> 8) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); + ctr += _mm_popcnt_u32((good >> 24) & 0xFF); + } + + while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { + f = _mm_loadu_si128((__m128i *)&buf[pos]); + f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); + t = _mm_srli_epi16(f, 4); + f = _mm_blend_epi16(f, t, 0xAA); + f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); + pos += 12; + + t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); + good = _mm_movemask_epi8(t); + +#ifdef BMI + good &= 0x5555; + idx0 = _pdep_u64(good, 0x1111111111111111); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + pilo = _mm_cvtsi64_si128(idx0); +#else + good = _pext_u32(good, 0x5555); + pilo = _mm_loadl_epi64((__m128i *)&idx[good]); +#endif + + pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); + pilo = _mm_unpacklo_epi8(pilo, pihi); + f = _mm_shuffle_epi8(f, pilo); + _mm_storeu_si128((__m128i *)&r[ctr], f); + ctr += _mm_popcnt_u32(good); + } + + while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(val1 < KYBER_Q && ctr < KYBER_N) + r[ctr++] = val1; + } + + return ctr; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..3be5e2192e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h @@ -0,0 +1,14 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) + +#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..18325ebec0 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S @@ -0,0 +1,255 @@ +#include "consts.h" +.include "fq.inc" +.include "shuffle.inc" + +/* +nttpack_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +#store +vmovdqa %ymm7,(%rdi) +vmovdqa %ymm9,32(%rdi) +vmovdqa %ymm6,64(%rdi) +vmovdqa %ymm3,96(%rdi) +vmovdqa %ymm10,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm5,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret +*/ + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +shuffle1 9,5,10,5 +shuffle1 8,4,9,4 +shuffle1 7,3,8,3 +shuffle1 6,11,7,11 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm5,32(%rdi) +vmovdqa %ymm9,64(%rdi) +vmovdqa %ymm4,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm3,160(%rdi) +vmovdqa %ymm7,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret + +ntttobytes128_avx: +#load +vmovdqa (%rsi),%ymm5 +vmovdqa 32(%rsi),%ymm6 +vmovdqa 64(%rsi),%ymm7 +vmovdqa 96(%rsi),%ymm8 +vmovdqa 128(%rsi),%ymm9 +vmovdqa 160(%rsi),%ymm10 +vmovdqa 192(%rsi),%ymm11 +vmovdqa 224(%rsi),%ymm12 + +#csubq +csubq 5,13 +csubq 6,13 +csubq 7,13 +csubq 8,13 +csubq 9,13 +csubq 10,13 +csubq 11,13 +csubq 12,13 + +#bitpack +vpsllw $12,%ymm6,%ymm4 +vpor %ymm4,%ymm5,%ymm4 + +vpsrlw $4,%ymm6,%ymm5 +vpsllw $8,%ymm7,%ymm6 +vpor %ymm5,%ymm6,%ymm5 + +vpsrlw $8,%ymm7,%ymm6 +vpsllw $4,%ymm8,%ymm7 +vpor %ymm6,%ymm7,%ymm6 + +vpsllw $12,%ymm10,%ymm7 +vpor %ymm7,%ymm9,%ymm7 + +vpsrlw $4,%ymm10,%ymm8 +vpsllw $8,%ymm11,%ymm9 +vpor %ymm8,%ymm9,%ymm8 + +vpsrlw $8,%ymm11,%ymm9 +vpsllw $4,%ymm12,%ymm10 +vpor %ymm9,%ymm10,%ymm9 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 + +shuffle2 3,4,8,4 +shuffle2 6,5,3,5 +shuffle2 7,9,6,9 + +shuffle4 8,3,7,3 +shuffle4 6,4,8,4 +shuffle4 5,9,6,9 + +shuffle8 7,8,5,8 +shuffle8 6,3,7,3 +shuffle8 4,9,6,9 + +#store +vmovdqu %ymm5,(%rdi) +vmovdqu %ymm7,32(%rdi) +vmovdqu %ymm6,64(%rdi) +vmovdqu %ymm8,96(%rdi) +vmovdqu %ymm3,128(%rdi) +vmovdqu %ymm9,160(%rdi) + +ret + +.global cdecl(ntttobytes_avx) +cdecl(ntttobytes_avx): +#consts +vmovdqa _16XQ*2(%rdx),%ymm0 +call ntttobytes128_avx +add $256,%rsi +add $192,%rdi +call ntttobytes128_avx +ret + +nttfrombytes128_avx: +#load +vmovdqu (%rsi),%ymm4 +vmovdqu 32(%rsi),%ymm5 +vmovdqu 64(%rsi),%ymm6 +vmovdqu 96(%rsi),%ymm7 +vmovdqu 128(%rsi),%ymm8 +vmovdqu 160(%rsi),%ymm9 + +shuffle8 4,7,3,7 +shuffle8 5,8,4,8 +shuffle8 6,9,5,9 + +shuffle4 3,8,6,8 +shuffle4 7,5,3,5 +shuffle4 4,9,7,9 + +shuffle2 6,5,4,5 +shuffle2 8,7,6,7 +shuffle2 3,9,8,9 + +shuffle1 4,7,10,7 +shuffle1 5,8,4,8 +shuffle1 6,9,5,9 + +#bitunpack +vpsrlw $12,%ymm10,%ymm11 +vpsllw $4,%ymm7,%ymm12 +vpor %ymm11,%ymm12,%ymm11 +vpand %ymm0,%ymm10,%ymm10 +vpand %ymm0,%ymm11,%ymm11 + +vpsrlw $8,%ymm7,%ymm12 +vpsllw $8,%ymm4,%ymm13 +vpor %ymm12,%ymm13,%ymm12 +vpand %ymm0,%ymm12,%ymm12 + +vpsrlw $4,%ymm4,%ymm13 +vpand %ymm0,%ymm13,%ymm13 + +vpsrlw $12,%ymm8,%ymm14 +vpsllw $4,%ymm5,%ymm15 +vpor %ymm14,%ymm15,%ymm14 +vpand %ymm0,%ymm8,%ymm8 +vpand %ymm0,%ymm14,%ymm14 + +vpsrlw $8,%ymm5,%ymm15 +vpsllw $8,%ymm9,%ymm1 +vpor %ymm15,%ymm1,%ymm15 +vpand %ymm0,%ymm15,%ymm15 + +vpsrlw $4,%ymm9,%ymm1 +vpand %ymm0,%ymm1,%ymm1 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm11,32(%rdi) +vmovdqa %ymm12,64(%rdi) +vmovdqa %ymm13,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm14,160(%rdi) +vmovdqa %ymm15,192(%rdi) +vmovdqa %ymm1,224(%rdi) + +ret + +.global cdecl(nttfrombytes_avx) +cdecl(nttfrombytes_avx): +#consts +vmovdqa _16XMASK*2(%rdx),%ymm0 +call nttfrombytes128_avx +add $256,%rdi +add $192,%rsi +call nttfrombytes128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..e4941f7a86 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h @@ -0,0 +1,34 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" +#include "fips202x4.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c new file mode 100644 index 0000000000..aa8e2850b1 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint64_t r; + __m256i f, g, h; + + h = _mm256_setzero_si256(); + for(i=0;i> 63; + return r; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + __m256i xvec, rvec, bvec; + + bvec = _mm256_set1_epi64x(-(uint64_t)b); + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h new file mode 100644 index 0000000000..70d40f3f3e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c new file mode 100644 index 0000000000..1500ffea56 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c @@ -0,0 +1,128 @@ +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: load32_littleendian +* +* Description: load 4 bytes into a 32-bit integer +* in little-endian order +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x +**************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* +* Name: load24_littleendian +* +* Description: load 3 bytes into a 32-bit integer +* in little-endian order. +* This function is only needed for Kyber-512 +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) +**************************************************/ +#if KYBER_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x55555555; + + for(j=0;j<8;j++) { + a = (d >> (4*j+0)) & 0x3; + b = (d >> (4*j+2)) & 0x3; + r->coeffs[8*i+j] = a - b; + } + } +} + +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3. +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +#if KYBER_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x00249249; + d += (t>>2) & 0x00249249; + + for(j=0;j<4;j++) { + a = (d >> (6*j+0)) & 0x7; + b = (d >> (6*j+3)) & 0x7; + r->coeffs[4*i+j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h new file mode 100644 index 0000000000..7b677d745d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h @@ -0,0 +1,14 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c new file mode 100644 index 0000000000..4a8b4c894f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk +* and the public seed used to generate the matrix A. +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +// Not static for benchmarking +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) +{ + unsigned int ctr, i, j, k; + unsigned int buflen, off; + uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; + xof_state state; + xof_init(&state, seed); + + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c new file mode 100644 index 0000000000..2f2eb10b2f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c @@ -0,0 +1,146 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint8_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 +}; + +void init_ntt() { + unsigned int i; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i=1;i<128;i++) + tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); + + for(i=0;i<128;i++) { + zetas[i] = tmp[tree[i]]; + if(zetas[i] > KYBER_Q/2) + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; + } +} +*/ + +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, + -171, 622, 1577, 182, 962, -1202, -1474, 1468, + 573, -1325, 264, 383, -829, 1458, -1602, -130, + -681, 1017, 732, 608, -1542, 411, -205, -1571, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -1103, 430, 555, 843, -1251, 871, 1550, 105, + 422, 587, 177, -235, -291, -460, 1574, 1653, + -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, + 817, 1097, 603, 610, 1322, -1285, -1465, 384, + -1215, -136, 1218, -1335, -874, 220, -1187, -1659, + -1185, -1530, -1278, 794, -1510, -854, -870, 478, + -108, -308, 996, 991, 958, -1460, 1522, 1628 +}; + +/************************************************* +* Name: fqmul +* +* Description: Multiplication followed by Montgomery reduction +* +* Arguments: - int16_t a: first factor +* - int16_t b: second factor +* +* Returns 16-bit integer congruent to a*b*R^{-1} mod q +**************************************************/ +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} + +/************************************************* +* Name: ntt +* +* Description: Inplace number-theoretic transform (NTT) in Rq. +* input is in standard order, output is in bitreversed order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void ntt(int16_t r[256]) { + unsigned int len, start, j, k; + int16_t t, zeta; + + k = 1; + for(len = 128; len >= 2; len >>= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k++]; + for(j = start; j < start + len; j++) { + t = fqmul(zeta, r[j + len]); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inplace inverse number-theoretic transform in Rq and +* multiplication by Montgomery factor 2^16. +* Input is in bitreversed order, output is in standard order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void invntt(int16_t r[256]) { + unsigned int start, len, j, k; + int16_t t, zeta; + const int16_t f = 1441; // mont^2/128 + + k = 127; + for(len = 2; len <= 128; len <<= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k--]; + for(j = start; j < start + len; j++) { + t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(zeta, r[j + len]); + } + } + } + + for(j = 0; j < 256; j++) + r[j] = fqmul(r[j], f); +} + +/************************************************* +* Name: basemul +* +* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) +* used for multiplication of elements in Rq in NTT domain +* +* Arguments: - int16_t r[2]: pointer to the output polynomial +* - const int16_t a[2]: pointer to the first factor +* - const int16_t b[2]: pointer to the second factor +* - int16_t zeta: integer defining the reduction polynomial +**************************************************/ +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) +{ + r[0] = fqmul(a[1], b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a[0], b[0]); + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h new file mode 100644 index 0000000000..227ea74f08 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define zetas KYBER_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +#define ntt KYBER_NAMESPACE(ntt) +void ntt(int16_t poly[256]); + +#define invntt KYBER_NAMESPACE(invntt) +void invntt(int16_t poly[256]); + +#define basemul KYBER_NAMESPACE(basemul) +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h new file mode 100644 index 0000000000..36b2b987f3 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h @@ -0,0 +1,55 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_ref_##s +#elif (KYBER_K == 3) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_ref_##s +#elif (KYBER_K == 4) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_ref_##s +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c new file mode 100644 index 0000000000..0fe5a20f63 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c @@ -0,0 +1,360 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) +{ + unsigned int i,j; + int32_t u; + uint32_t d0; + uint8_t t[8]; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); + r[1] = t[2] | (t[3] << 4); + r[2] = t[4] | (t[5] << 4); + r[3] = t[6] | (t[7] << 4); + r += 4; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; + } + + r[0] = (t[0] >> 0) | (t[1] << 5); + r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); + r[2] = (t[3] >> 1) | (t[4] << 4); + r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); + r[4] = (t[6] >> 2) | (t[7] << 3); + r += 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_decompress +* +* Description: De-serialization and subsequent decompression of a polynomial; +* approximate inverse of poly_compress +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYCOMPRESSEDBYTES bytes) +**************************************************/ +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) +{ + unsigned int i; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; + r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; + a += 1; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + unsigned int j; + uint8_t t[8]; + for(i=0;i> 0); + t[1] = (a[0] >> 5) | (a[1] << 3); + t[2] = (a[1] >> 2); + t[3] = (a[1] >> 7) | (a[2] << 1); + t[4] = (a[2] >> 4) | (a[3] << 4); + t[5] = (a[3] >> 1); + t[6] = (a[3] >> 6) | (a[4] << 2); + t[7] = (a[4] >> 3); + a += 5; + + for(j=0;j<8;j++) + r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + unsigned int i; + uint16_t t0, t1; + + for(i=0;icoeffs[2*i]; + t0 += ((int16_t)t0 >> 15) & KYBER_Q; + t1 = a->coeffs[2*i+1]; + t1 += ((int16_t)t1 >> 15) & KYBER_Q; + r[3*i+0] = (t0 >> 0); + r[3*i+1] = (t0 >> 8) | (t1 << 4); + r[3*i+2] = (t1 >> 4); + } +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + unsigned int i; + for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; + r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; + } +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ + unsigned int i,j; + int16_t mask; + +#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) +#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" +#endif + + for(i=0;i> j)&1); + r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); + } + } +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message +* +* Arguments: - uint8_t *msg: pointer to output message +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) +{ + unsigned int i,j; + uint32_t t; + + for(i=0;icoeffs[8*i+j]; + // t += ((int16_t)t >> 15) & KYBER_Q; + // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; + t <<= 1; + t += 1665; + t *= 80635; + t >>= 28; + t &= 1; + msg[i] |= t << j; + } + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA1*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta1(r, buf); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA2*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); +} + + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in normal order, output in bitreversed order +* +* Arguments: - uint16_t *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt(r->coeffs); + poly_reduce(r); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* inputs assumed to be in bitreversed order, output in normal order +* +* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt(r->coeffs); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); + basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); + } +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + unsigned int i; + const int16_t f = (1ULL << 32) % KYBER_Q; + for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + unsigned int i; + for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h new file mode 100644 index 0000000000..9a99c7cdad --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h @@ -0,0 +1,53 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct{ + int16_t coeffs[KYBER_N]; +} poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c new file mode 100644 index 0000000000..661c71ec32 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c @@ -0,0 +1,247 @@ +#include +#include "params.h" +#include "poly.h" +#include "polyvec.h" + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) +{ + unsigned int i,j,k; + uint64_t d0; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;ivec[i].coeffs[8*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; + + } + + r[ 0] = (t[0] >> 0); + r[ 1] = (t[0] >> 8) | (t[1] << 3); + r[ 2] = (t[1] >> 5) | (t[2] << 6); + r[ 3] = (t[2] >> 2); + r[ 4] = (t[2] >> 10) | (t[3] << 1); + r[ 5] = (t[3] >> 7) | (t[4] << 4); + r[ 6] = (t[4] >> 4) | (t[5] << 7); + r[ 7] = (t[5] >> 1); + r[ 8] = (t[5] >> 9) | (t[6] << 2); + r[ 9] = (t[6] >> 6) | (t[7] << 5); + r[10] = (t[7] >> 3); + r += 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;ivec[i].coeffs[4*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); + r[1] = (t[0] >> 8) | (t[1] << 2); + r[2] = (t[1] >> 6) | (t[2] << 4); + r[3] = (t[2] >> 4) | (t[3] << 6); + r[4] = (t[3] >> 2); + r += 5; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) +{ + unsigned int i,j,k; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); + t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); + t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); + t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); + t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); + t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); + t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); + t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); + a += 11; + + for(k=0;k<8;k++) + r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;i> 0) | ((uint16_t)a[1] << 8); + t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); + t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); + t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); + a += 5; + + for(k=0;k<4;k++) + r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements of a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly t; + + poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); + for(i=1;ivec[i], &b->vec[i]); + poly_add(r, r, &t); + } + + poly_reduce(r); +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h new file mode 100644 index 0000000000..57b605494e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c new file mode 100644 index 0000000000..9d8e7edf83 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c @@ -0,0 +1,42 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: Montgomery reduction; given a 32-bit integer a, computes +* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 +* +* Arguments: - int32_t a: input integer to be reduced; +* has to be in {-q2^15,...,q2^15-1} +* +* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. +**************************************************/ +int16_t montgomery_reduce(int32_t a) +{ + int16_t t; + + t = (int16_t)a*QINV; + t = (a - (int32_t)t*KYBER_Q) >> 16; + return t; +} + +/************************************************* +* Name: barrett_reduce +* +* Description: Barrett reduction; given a 16-bit integer a, computes +* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} +* +* Arguments: - int16_t a: input integer to be reduced +* +* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. +**************************************************/ +int16_t barrett_reduce(int16_t a) { + int16_t t; + const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; + + t = ((int32_t)v*a + (1<<25)) >> 26; + t *= KYBER_Q; + return a - t; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h new file mode 100644 index 0000000000..c1bc1e4c7b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h @@ -0,0 +1,16 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 + +#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a); + +#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric.h new file mode 100644 index 0000000000..2acc66f98d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric.h @@ -0,0 +1,35 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_init(STATE, SEED) shake128_inc_init(STATE) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define xof_release(STATE) shake128_inc_ctx_release(STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/verify.c new file mode 100644 index 0000000000..ed4a6541f8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/verify.c @@ -0,0 +1,47 @@ +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint8_t r = 0; + + for(i=0;i> 63; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + + b = -b; + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/align.h new file mode 100644 index 0000000000..3463866f37 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT16(N) \ + union { \ + int16_t coeffs[N]; \ + __m256i vec[(N+15)/16]; \ + } + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/api.h new file mode 100644 index 0000000000..a154e80f1d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/basemul.S new file mode 100644 index 0000000000..36990639b2 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/basemul.S @@ -0,0 +1,105 @@ +#include "consts.h" + +.macro schoolbook off +vmovdqa _16XQINV*2(%rcx),%ymm0 +vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 +vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 +vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 +vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 + +vpmullw %ymm0,%ymm1,%ymm9 # a0.lo +vpmullw %ymm0,%ymm2,%ymm10 # b0.lo +vpmullw %ymm0,%ymm3,%ymm11 # a1.lo +vpmullw %ymm0,%ymm4,%ymm12 # b1.lo + +vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 +vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 + +vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi +vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi +vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi +vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi + +vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 +vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 + +vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi +vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi +vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi +vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi + +vmovdqa %ymm13,(%rsp) + +vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo +vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo +vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo +vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo + +vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo +vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo +vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo +vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo + +vmovdqa _16XQ*2(%rcx),%ymm8 +vpmulhw %ymm8,%ymm13,%ymm13 +vpmulhw %ymm8,%ymm9,%ymm9 +vpmulhw %ymm8,%ymm5,%ymm5 +vpmulhw %ymm8,%ymm10,%ymm10 +vpmulhw %ymm8,%ymm6,%ymm6 +vpmulhw %ymm8,%ymm11,%ymm11 +vpmulhw %ymm8,%ymm7,%ymm7 +vpmulhw %ymm8,%ymm12,%ymm12 + +vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 +vpsubw %ymm9,%ymm1,%ymm9 # a0d0 +vpsubw %ymm5,%ymm14,%ymm5 # b0c0 +vpsubw %ymm10,%ymm2,%ymm10 # b0d0 + +vpsubw %ymm6,%ymm15,%ymm6 # a1c1 +vpsubw %ymm11,%ymm3,%ymm11 # a1d1 +vpsubw %ymm7,%ymm0,%ymm7 # b1c1 +vpsubw %ymm12,%ymm4,%ymm12 # b1d1 + +vmovdqa (%r9),%ymm0 +vmovdqa 32(%r9),%ymm1 +vpmullw %ymm0,%ymm10,%ymm2 +vpmullw %ymm0,%ymm12,%ymm3 +vpmulhw %ymm1,%ymm10,%ymm10 +vpmulhw %ymm1,%ymm12,%ymm12 +vpmulhw %ymm8,%ymm2,%ymm2 +vpmulhw %ymm8,%ymm3,%ymm3 +vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 +vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 + +vpaddw %ymm5,%ymm9,%ymm9 +vpaddw %ymm7,%ymm11,%ymm11 +vpsubw %ymm13,%ymm10,%ymm13 +vpsubw %ymm12,%ymm6,%ymm6 + +vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(64*\off+16)*2(%rdi) +vmovdqa %ymm6,(64*\off+32)*2(%rdi) +vmovdqa %ymm11,(64*\off+48)*2(%rdi) +.endm + +.text +.global cdecl(basemul_avx) +cdecl(basemul_avx): +mov %rsp,%r8 +and $-32,%rsp +sub $32,%rsp + +lea (_ZETAS_EXP+176)*2(%rcx),%r9 +schoolbook 0 + +add $32*2,%r9 +schoolbook 1 + +add $192*2,%r9 +schoolbook 2 + +add $32*2,%r9 +schoolbook 3 + +mov %r8,%rsp +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.c new file mode 100644 index 0000000000..dad473c79e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.c @@ -0,0 +1,144 @@ +#include +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask55 = _mm256_set1_epi32(0x55555555); + const __m256i mask33 = _mm256_set1_epi32(0x33333333); + const __m256i mask03 = _mm256_set1_epi32(0x03030303); + const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); + + for(i = 0; i < KYBER_N/64; i++) { + f0 = _mm256_load_si256(&buf[i]); + + f1 = _mm256_srli_epi16(f0, 1); + f0 = _mm256_and_si256(mask55, f0); + f1 = _mm256_and_si256(mask55, f1); + f0 = _mm256_add_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 2); + f0 = _mm256_and_si256(mask33, f0); + f1 = _mm256_and_si256(mask33, f1); + f0 = _mm256_add_epi8(f0, mask33); + f0 = _mm256_sub_epi8(f0, f1); + + f1 = _mm256_srli_epi16(f0, 4); + f0 = _mm256_and_si256(mask0F, f0); + f1 = _mm256_and_si256(mask0F, f1); + f0 = _mm256_sub_epi8(f0, mask03); + f1 = _mm256_sub_epi8(f1, mask03); + + f2 = _mm256_unpacklo_epi8(f0, f1); + f3 = _mm256_unpackhi_epi8(f0, f1); + + f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); + f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); + f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); + f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); + + _mm256_store_si256(&r->vec[4*i+0], f0); + _mm256_store_si256(&r->vec[4*i+1], f2); + _mm256_store_si256(&r->vec[4*i+2], f1); + _mm256_store_si256(&r->vec[4*i+3], f3); + } +} + +#if KYBER_ETA1 == 3 +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3 +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const __m256i *buf: pointer to aligned input byte array +**************************************************/ +static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i mask249 = _mm256_set1_epi32(0x249249); + const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); + const __m256i mask07 = _mm256_set1_epi32(7); + const __m256i mask70 = _mm256_set1_epi32(7 << 16); + const __m256i mask3 = _mm256_set1_epi16(3); + const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); + + for(i = 0; i < KYBER_N/32; i++) { + f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); + f0 = _mm256_permute4x64_epi64(f0,0x94); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + + f1 = _mm256_srli_epi32(f0,1); + f2 = _mm256_srli_epi32(f0,2); + f0 = _mm256_and_si256(mask249,f0); + f1 = _mm256_and_si256(mask249,f1); + f2 = _mm256_and_si256(mask249,f2); + f0 = _mm256_add_epi32(f0,f1); + f0 = _mm256_add_epi32(f0,f2); + + f1 = _mm256_srli_epi32(f0,3); + f0 = _mm256_add_epi32(f0,mask6DB); + f0 = _mm256_sub_epi32(f0,f1); + + f1 = _mm256_slli_epi32(f0,10); + f2 = _mm256_srli_epi32(f0,12); + f3 = _mm256_srli_epi32(f0, 2); + f0 = _mm256_and_si256(f0,mask07); + f1 = _mm256_and_si256(f1,mask70); + f2 = _mm256_and_si256(f2,mask07); + f3 = _mm256_and_si256(f3,mask70); + f0 = _mm256_add_epi16(f0,f1); + f1 = _mm256_add_epi16(f2,f3); + f0 = _mm256_sub_epi16(f0,mask3); + f1 = _mm256_sub_epi16(f1,mask3); + + f2 = _mm256_unpacklo_epi32(f0,f1); + f3 = _mm256_unpackhi_epi32(f0,f1); + + f0 = _mm256_permute2x128_si256(f2,f3,0x20); + f1 = _mm256_permute2x128_si256(f2,f3,0x31); + + _mm256_store_si256(&r->vec[2*i+0], f0); + _mm256_store_si256(&r->vec[2*i+1], f1); + } +} +#endif + +/* buf 32 bytes longer for cbd3 */ +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, (uint8_t *)buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.h new file mode 100644 index 0000000000..05788e06b4 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.h @@ -0,0 +1,15 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.c new file mode 100644 index 0000000000..84e596893d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.c @@ -0,0 +1,121 @@ +#include "align.h" +#include "params.h" +#include "consts.h" + +#define Q KYBER_Q +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 +#define V 20159 // floor(2^26/q + 0.5) +#define FHI 1441 // mont^2/128 +#define FLO -10079 // qinv*FHI +#define MONTSQHI 1353 // mont^2 +#define MONTSQLO 20553 // qinv*MONTSQHI +#define MASK 4095 +#define SHIFT 32 + +const qdata_t qdata = {{ +#define _16XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, + +#define _16XQINV 16 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _16XV 32 + V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, + +#define _16XFLO 48 + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, + +#define _16XFHI 64 + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, + +#define _16XMONTSQLO 80 + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, + +#define _16XMONTSQHI 96 + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, + +#define _16XMASK 112 + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, + +#define _REVIDXB 128 + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, + +#define _REVIDXD 144 + 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, + +#define _ZETAS_EXP 160 + 31498, 31498, 31498, 31498, -758, -758, -758, -758, + 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, + -359, -359, -359, -359, -359, -359, -359, -359, + -359, -359, -359, -359, -359, -359, -359, -359, + 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, + -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, + 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, + 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, + -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, + -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, + -171, -171, -171, -171, 622, 622, 622, 622, + 1577, 1577, 1577, 1577, 182, 182, 182, 182, + -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, + 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, + 573, 573, -1325, -1325, 264, 264, 383, 383, + -829, -829, 1458, 1458, -1602, -1602, -130, -130, + -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, + -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, + 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, + -1103, 555, -1251, 1550, 422, 177, -291, 1574, + -246, 1159, -777, -602, -1590, -872, 418, -156, + 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, + -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, + 430, 843, 871, 105, 587, -235, -460, 1653, + 778, -147, 1483, 1119, 644, 349, 329, -75, + 787, 787, 787, 787, 787, 787, 787, 787, + 787, 787, 787, 787, 787, 787, 787, 787, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, + 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, + -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, + 287, 287, 287, 287, 287, 287, 287, 287, + 202, 202, 202, 202, 202, 202, 202, 202, + 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, + -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, + 962, 962, 962, 962, -1202, -1202, -1202, -1202, + -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, + -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, + 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, + -681, -681, 1017, 1017, 732, 732, 608, 608, + -1542, -1542, 411, 411, -205, -205, -1571, -1571, + 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, + 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, + 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, + 817, 603, 1322, -1465, -1215, 1218, -874, -1187, + -1185, -1278, -1510, -870, -108, 996, 958, 1522, + 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, + -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, + 1097, 610, -1285, 384, -136, -1335, 220, -1659, + -1530, 794, -854, 478, -308, 991, -1460, 1628, + +#define _16XSHIFT 624 + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, + SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT +}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.h new file mode 100644 index 0000000000..f95899cd8e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.h @@ -0,0 +1,43 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _16XQ 0 +#define _16XQINV 16 +#define _16XV 32 +#define _16XFLO 48 +#define _16XFHI 64 +#define _16XMONTSQLO 80 +#define _16XMONTSQHI 96 +#define _16XMASK 112 +#define _REVIDXB 128 +#define _REVIDXD 144 +#define _ZETAS_EXP 160 +#define _16XSHIFT 624 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#ifdef __ASSEMBLER__ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define cdecl2(s) decorate(s) +#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) +#else +#define cdecl(s) KYBER_NAMESPACE(##s) +#endif +#endif + +#ifndef __ASSEMBLER__ +#include "align.h" +typedef ALIGNED_INT16(640) qdata_t; +#define qdata KYBER_NAMESPACE(qdata) +extern const qdata_t qdata; +#endif + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.S new file mode 100644 index 0000000000..3bb1ebd3d8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.S @@ -0,0 +1,88 @@ +#include "consts.h" +.include "fq.inc" + +.text +reduce128_avx: +#load +vmovdqa (%rdi),%ymm2 +vmovdqa 32(%rdi),%ymm3 +vmovdqa 64(%rdi),%ymm4 +vmovdqa 96(%rdi),%ymm5 +vmovdqa 128(%rdi),%ymm6 +vmovdqa 160(%rdi),%ymm7 +vmovdqa 192(%rdi),%ymm8 +vmovdqa 224(%rdi),%ymm9 + +red16 2 +red16 3 +red16 4 +red16 5 +red16 6 +red16 7 +red16 8 +red16 9 + +#store +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm3,32(%rdi) +vmovdqa %ymm4,64(%rdi) +vmovdqa %ymm5,96(%rdi) +vmovdqa %ymm6,128(%rdi) +vmovdqa %ymm7,160(%rdi) +vmovdqa %ymm8,192(%rdi) +vmovdqa %ymm9,224(%rdi) + +ret + +.global cdecl(reduce_avx) +cdecl(reduce_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XV*2(%rsi),%ymm1 +call reduce128_avx +add $256,%rdi +call reduce128_avx +ret + +tomont128_avx: +#load +vmovdqa (%rdi),%ymm3 +vmovdqa 32(%rdi),%ymm4 +vmovdqa 64(%rdi),%ymm5 +vmovdqa 96(%rdi),%ymm6 +vmovdqa 128(%rdi),%ymm7 +vmovdqa 160(%rdi),%ymm8 +vmovdqa 192(%rdi),%ymm9 +vmovdqa 224(%rdi),%ymm10 + +fqmulprecomp 1,2,3,11 +fqmulprecomp 1,2,4,12 +fqmulprecomp 1,2,5,13 +fqmulprecomp 1,2,6,14 +fqmulprecomp 1,2,7,15 +fqmulprecomp 1,2,8,11 +fqmulprecomp 1,2,9,12 +fqmulprecomp 1,2,10,13 + +#store +vmovdqa %ymm3,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm5,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm7,128(%rdi) +vmovdqa %ymm8,160(%rdi) +vmovdqa %ymm9,192(%rdi) +vmovdqa %ymm10,224(%rdi) + +ret + +.global cdecl(tomont_avx) +cdecl(tomont_avx): +#consts +vmovdqa _16XQ*2(%rsi),%ymm0 +vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 +vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 +call tomont128_avx +add $256,%rdi +call tomont128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.inc new file mode 100644 index 0000000000..4b7afc3118 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.inc @@ -0,0 +1,30 @@ +.macro red16 r,rs=0,x=12 +vpmulhw %ymm1,%ymm\r,%ymm\x +.if \rs +vpmulhrsw %ymm\rs,%ymm\x,%ymm\x +.else +vpsraw $10,%ymm\x,%ymm\x +.endif +vpmullw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro csubq r,x=12 +vpsubw %ymm0,%ymm\r,%ymm\r +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro caddq r,x=12 +vpsraw $15,%ymm\r,%ymm\x +vpand %ymm0,%ymm\x,%ymm\x +vpaddw %ymm\x,%ymm\r,%ymm\r +.endm + +.macro fqmulprecomp al,ah,b,x=12 +vpmullw %ymm\al,%ymm\b,%ymm\x +vpmulhw %ymm\ah,%ymm\b,%ymm\b +vpmulhw %ymm0,%ymm\x,%ymm\x +vpsubw %ymm\x,%ymm\b,%ymm\b +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/indcpa.c new file mode 100644 index 0000000000..572ce49007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/indcpa.c @@ -0,0 +1,566 @@ +#include +#include +#include +#include +#include "align.h" +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "cbd.h" +#include "rejsample.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk and the +* public seed used to generate the matrix A. +* The polynomial coefficients in pk are assumed to +* lie in the invertal [0,q], i.e. pk must be reduced +* by polyvec_reduce(). +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key. +* The polynomial coefficients in sk are assumed to +* lie in the invertal [0,q], i.e. sk must be reduced +* by polyvec_reduce(). +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v. +* The polynomial coefficients in b and v are assumed to +* lie in the invertal [0,q], i.e. b and v must be reduced +* by polyvec_reduce() and poly_reduce(), respectively. +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output array +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#if KYBER_K == 2 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 1; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 1; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[1].vec[0]); + poly_nttunpack(&a[1].vec[1]); + shake128x4_inc_ctx_release(&state); +} +#elif KYBER_K == 3 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128incctx state1x; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 0; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 0; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 0; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 0; + buf[3].coeffs[33] = 1; + } + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[0].vec[0]); + poly_nttunpack(&a[0].vec[1]); + poly_nttunpack(&a[0].vec[2]); + poly_nttunpack(&a[1].vec[0]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = 2; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = 0; + buf[3].coeffs[32] = 2; + buf[3].coeffs[33] = 1; + } + else { + buf[0].coeffs[32] = 1; + buf[0].coeffs[33] = 1; + buf[1].coeffs[32] = 2; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = 0; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = 1; + buf[3].coeffs[33] = 2; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); + + poly_nttunpack(&a[1].vec[1]); + poly_nttunpack(&a[1].vec[2]); + poly_nttunpack(&a[2].vec[0]); + poly_nttunpack(&a[2].vec[1]); + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + buf[0].coeffs[32] = 2; + buf[0].coeffs[33] = 2; + + shake128_inc_init(&state1x); + shake128_absorb_once(&state1x, buf[0].coeffs, 34); + shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); + ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); + while(ctr0 < KYBER_N) { + shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); + ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + } + shake128_inc_ctx_release(&state1x); + + poly_nttunpack(&a[2].vec[2]); +} +#elif KYBER_K == 4 +void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) +{ + unsigned int i, ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; + __m256i f; + shake128x4incctx state; + shake128x4_inc_init(&state); + + for(i=0;i<4;i++) { + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + if(transposed) { + buf[0].coeffs[32] = i; + buf[0].coeffs[33] = 0; + buf[1].coeffs[32] = i; + buf[1].coeffs[33] = 1; + buf[2].coeffs[32] = i; + buf[2].coeffs[33] = 2; + buf[3].coeffs[32] = i; + buf[3].coeffs[33] = 3; + } + else { + buf[0].coeffs[32] = 0; + buf[0].coeffs[33] = i; + buf[1].coeffs[32] = 1; + buf[1].coeffs[33] = i; + buf[2].coeffs[32] = 2; + buf[2].coeffs[33] = i; + buf[3].coeffs[32] = 3; + buf[3].coeffs[33] = i; + } + + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); + + while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + + poly_nttunpack(&a[i].vec[0]); + poly_nttunpack(&a[i].vec[1]); + poly_nttunpack(&a[i].vec[2]); + poly_nttunpack(&a[i].vec[3]); + } + shake128x4_inc_ctx_release(&state); +} +#endif + +/************************************************* +* Name: indcpa_keypair_derand +* +* Description: Generates public and private key for the CPA-secure +* public-key encryption scheme underlying Kyber +* +* Arguments: - uint8_t *pk: pointer to output public key +* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (of length KYBER_SYMBYTES bytes) +**************************************************/ +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]) +{ + unsigned int i; + uint8_t buf[2*KYBER_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + KYBER_SYMBYTES; + polyvec a[KYBER_K], e, pkpv, skpv; + + hash_g(buf, coins, KYBER_SYMBYTES); + + gen_a(a, publicseed); + +#if KYBER_K == 2 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); +#elif KYBER_K == 3 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); +#elif KYBER_K == 4 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_reduce(&skpv); + polyvec_ntt(&e); + + // matrix-vector multiplication + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/invntt.S new file mode 100644 index 0000000000..76d4189996 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/invntt.S @@ -0,0 +1,193 @@ +#include "consts.h" +.include "shuffle.inc" +.include "fq.inc" + +.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 +vpsubw %ymm\rl0,%ymm\rh0,%ymm12 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 +vpsubw %ymm\rl1,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl0,%ymm12,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 +vpsubw %ymm\rl2,%ymm\rh2,%ymm14 + +vpmullw %ymm\zl0,%ymm13,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 +vpsubw %ymm\rl3,%ymm\rh3,%ymm15 + +vpmullw %ymm\zl1,%ymm14,%ymm\rh2 +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 +vpmullw %ymm\zl1,%ymm15,%ymm\rh3 + +vpmulhw %ymm\zh0,%ymm12,%ymm12 +vpmulhw %ymm\zh0,%ymm13,%ymm13 + +vpmulhw %ymm\zh1,%ymm14,%ymm14 +vpmulhw %ymm\zh1,%ymm15,%ymm15 + +vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 + +vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 + +# + +# + +vpsubw %ymm\rh0,%ymm12,%ymm\rh0 + +vpsubw %ymm\rh1,%ymm13,%ymm\rh1 + +vpsubw %ymm\rh2,%ymm14,%ymm\rh2 +vpsubw %ymm\rh3,%ymm15,%ymm\rh3 +.endm + +.macro intt_levels0t5 off +/* level 0 */ +vmovdqa _16XFLO*2(%rsi),%ymm2 +vmovdqa _16XFHI*2(%rsi),%ymm3 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +fqmulprecomp 2,3,4 +fqmulprecomp 2,3,6 +fqmulprecomp 2,3,5 +fqmulprecomp 2,3,7 + +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 + +fqmulprecomp 2,3,8 +fqmulprecomp 2,3,10 +fqmulprecomp 2,3,9 +fqmulprecomp 2,3,11 + +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm12 +vpshufb %ymm12,%ymm15,%ymm15 +vpshufb %ymm12,%ymm1,%ymm1 +vpshufb %ymm12,%ymm2,%ymm2 +vpshufb %ymm12,%ymm3,%ymm3 + +butterfly 4,5,8,9,6,7,10,11,15,1,2,3 + +/* level 1 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 +vmovdqa _REVIDXB*2(%rsi),%ymm1 +vpshufb %ymm1,%ymm2,%ymm2 +vpshufb %ymm1,%ymm3,%ymm3 + +butterfly 4,5,6,7,8,9,10,11,2,2,3,3 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +/* level 2 */ +vmovdqa _REVIDXD*2(%rsi),%ymm12 +vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 +vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 + +butterfly 3,4,6,8,5,7,9,11,2,2,10,10 + +vmovdqa _16XV*2(%rsi),%ymm1 +red16 3 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +/* level 3 */ +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 +vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 + +butterfly 10,3,6,5,4,8,7,11,2,2,9,9 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +/* level 4 */ +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 +vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 + +butterfly 9,10,6,4,3,5,8,11,2,2,7,7 + +red16 9 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 +vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 + +butterfly 7,9,6,3,10,4,5,11,2,2,8,8 + +vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.macro intt_level6 off +/* level 6 */ +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 + +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 + +butterfly 4,5,6,7,8,9,10,11 + +.if \off == 0 +red16 4 +.endif + +vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +intt_levels0t5 0 +intt_levels0t5 1 + +intt_level6 0 +intt_level6 1 +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.S new file mode 100644 index 0000000000..0ce7b41297 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.S @@ -0,0 +1,189 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 +vpmullw %ymm\zl0,%ymm\rh0,%ymm12 +vpmullw %ymm\zl0,%ymm\rh1,%ymm13 + +vpmullw %ymm\zl1,%ymm\rh2,%ymm14 +vpmullw %ymm\zl1,%ymm\rh3,%ymm15 + +vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 +vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 + +vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 +vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 +.endm + +.macro reduce +vpmulhw %ymm0,%ymm12,%ymm12 +vpmulhw %ymm0,%ymm13,%ymm13 + +vpmulhw %ymm0,%ymm14,%ymm14 +vpmulhw %ymm0,%ymm15,%ymm15 +.endm + +.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 +vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln +vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 +vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 + +vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 +vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 +vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 + +vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 +vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 + +vpsubw %ymm12,%ymm\rln,%ymm\rln +vpaddw %ymm12,%ymm\rh0,%ymm\rh0 +vpsubw %ymm13,%ymm\rl0,%ymm\rl0 + +vpaddw %ymm13,%ymm\rh1,%ymm\rh1 +vpsubw %ymm14,%ymm\rl1,%ymm\rl1 +vpaddw %ymm14,%ymm\rh2,%ymm\rh2 + +vpsubw %ymm15,%ymm\rl2,%ymm\rl2 +vpaddw %ymm15,%ymm\rh3,%ymm\rh3 +.endm + +.macro level0 off +vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 +vmovdqa (64*\off+128)*2(%rdi),%ymm8 +vmovdqa (64*\off+144)*2(%rdi),%ymm9 +vmovdqa (64*\off+160)*2(%rdi),%ymm10 +vmovdqa (64*\off+176)*2(%rdi),%ymm11 +vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) +vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) +vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) +vmovdqa %ymm8,(64*\off+128)*2(%rdi) +vmovdqa %ymm9,(64*\off+144)*2(%rdi) +vmovdqa %ymm10,(64*\off+160)*2(%rdi) +vmovdqa %ymm11,(64*\off+176)*2(%rdi) +.endm + +.macro levels1t6 off +/* level 1 */ +vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 +vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 +vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 +vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 +vmovdqa (128*\off+112)*2(%rdi),%ymm11 +vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 + +mul 8,9,10,11 + +vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 +vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 +vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 +vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 + +reduce +update 3,4,5,6,7,8,9,10,11 + +/* level 2 */ +shuffle8 5,10,7,10 +shuffle8 6,11,5,11 + +vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 + +mul 7,10,5,11 + +shuffle8 3,8,6,8 +shuffle8 4,9,3,9 + +reduce +update 4,6,8,3,9,7,10,5,11 + +/* level 3 */ +shuffle4 8,5,9,5 +shuffle4 3,11,8,11 + +vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 + +mul 9,5,8,11 + +shuffle4 4,7,3,7 +shuffle4 6,10,4,10 + +reduce +update 6,3,7,4,10,9,5,8,11 + +/* level 4 */ +shuffle2 7,8,10,8 +shuffle2 4,11,7,11 + +vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 + +mul 10,8,7,11 + +shuffle2 6,9,4,9 +shuffle2 3,5,6,5 + +reduce +update 3,4,9,6,5,10,8,7,11 + +/* level 5 */ +shuffle1 9,7,5,7 +shuffle1 6,11,9,11 + +vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 + +mul 5,7,9,11 + +shuffle1 3,10,6,10 +shuffle1 4,8,3,8 + +reduce +update 4,6,10,3,8,5,7,9,11 + +/* level 6 */ +vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 +vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 +vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 +vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 + +mul 10,3,9,11,14,15,8,2 + +reduce +update 8,4,6,5,7,10,3,9,11 + +vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) +vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) +vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) +vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) +vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) +vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) +vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) +vmovdqa %ymm11,(128*\off+112)*2(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _16XQ*2(%rsi),%ymm0 + +level0 0 +level0 1 + +levels1t6 0 +levels1t6 1 + +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.h new file mode 100644 index 0000000000..a4f48e343b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.h @@ -0,0 +1,28 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include + +#define ntt_avx KYBER_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *r, const __m256i *qdata); +#define invntt_avx KYBER_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *r, const __m256i *qdata); + +#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) +void nttpack_avx(__m256i *r, const __m256i *qdata); +#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *r, const __m256i *qdata); + +#define basemul_avx KYBER_NAMESPACE(basemul_avx) +void basemul_avx(__m256i *r, + const __m256i *a, + const __m256i *b, + const __m256i *qdata); + +#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) +void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); +#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) +void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/params.h new file mode 100644 index 0000000000..fdc688ea2b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/params.h @@ -0,0 +1,68 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + +//#define KYBER_90S /* Uncomment this if you want the 90S variant */ + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s +#endif +#elif (KYBER_K == 3) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s +#endif +#elif (KYBER_K == 4) +#ifdef KYBER_90S +#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s +#else +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s +#endif +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.c new file mode 100644 index 0000000000..681fd6d23e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.c @@ -0,0 +1,519 @@ +#include +#include +#include +#include "align.h" +#include "fips202x4.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +#if (KYBER_POLYCOMPRESSEDBYTES == 128) +void poly_compress(uint8_t r[128], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 9); + const __m256i mask = _mm256_set1_epi16(15); + const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); + const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); + + for(i=0;ivec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f2 = _mm256_mulhi_epi16(f2,v); + f3 = _mm256_mulhi_epi16(f3,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f2 = _mm256_mulhrs_epi16(f2,shift1); + f3 = _mm256_mulhrs_epi16(f3,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f2 = _mm256_and_si256(f2,mask); + f3 = _mm256_and_si256(f3,mask); + f0 = _mm256_packus_epi16(f0,f1); + f2 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift2); + f2 = _mm256_maddubs_epi16(f2,shift2); + f0 = _mm256_packus_epi16(f0,f2); + f0 = _mm256_permutevar8x32_epi32(f0,permdidx); + _mm256_storeu_si256((__m256i *)&r[32*i],f0); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[128]) +{ + unsigned int i; + __m128i t; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, + 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); + const __m256i mask = _mm256_set1_epi32(0x00F0000F); + const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) +void poly_compress(uint8_t r[160], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i shift1 = _mm256_set1_epi16(1 << 10); + const __m256i mask = _mm256_set1_epi16(31); + const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); + const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, + -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_mulhi_epi16(f0,v); + f1 = _mm256_mulhi_epi16(f1,v); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f1 = _mm256_mulhrs_epi16(f1,shift1); + f0 = _mm256_and_si256(f0,mask); + f1 = _mm256_and_si256(f1,mask); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 + f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srlv_epi64(f0,sllvdidx); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +void poly_decompress(poly * restrict r, const uint8_t a[160]) +{ + unsigned int i; + __m128i t; + __m256i f; + int16_t ti; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, + 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); + const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, + 248,1984,62,496,3968,124,992,31); + const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, + 128,16,512,64,8,256,32,1024); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial in NTT representation. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). The coefficients are orderd as output by +* poly_ntt(); the serialized output coefficients are in bitreversed +* order. +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + ntttobytes_avx(r, a->vec, qdata.vec); +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + nttfrombytes_avx(r->vec, a, qdata.vec); +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ +#if (KYBER_INDCPA_MSGBYTES != 32) +#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" +#endif + __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; + const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); + const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); + const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); + +#define FROMMSG64(i) \ + g3 = _mm256_shuffle_epi32(f,0x55*i); \ + g3 = _mm256_sllv_epi32(g3,shift); \ + g3 = _mm256_shuffle_epi8(g3,idx); \ + g0 = _mm256_slli_epi16(g3,12); \ + g1 = _mm256_slli_epi16(g3,8); \ + g2 = _mm256_slli_epi16(g3,4); \ + g0 = _mm256_srai_epi16(g0,15); \ + g1 = _mm256_srai_epi16(g1,15); \ + g2 = _mm256_srai_epi16(g2,15); \ + g3 = _mm256_srai_epi16(g3,15); \ + g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ + g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ + g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ + g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ + h0 = _mm256_unpacklo_epi64(g0,g1); \ + h2 = _mm256_unpackhi_epi64(g0,g1); \ + h1 = _mm256_unpacklo_epi64(g2,g3); \ + h3 = _mm256_unpackhi_epi64(g2,g3); \ + g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ + g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ + g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ + g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ + _mm256_store_si256(&r->vec[0+2*i+0],g0); \ + _mm256_store_si256(&r->vec[0+2*i+1],g1); \ + _mm256_store_si256(&r->vec[8+2*i+0],g2); \ + _mm256_store_si256(&r->vec[8+2*i+1],g3) + + f = _mm256_loadu_si256((__m256i *)msg); + FROMMSG64(0); + FROMMSG64(1); + FROMMSG64(2); + FROMMSG64(3); +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message. +* The coefficients of the input polynomial are assumed to +* lie in the invertal [0,q], i.e. the polynomial must be reduced +* by poly_reduce(). +* +* Arguments: - uint8_t *msg: pointer to output message +* - poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) +{ + unsigned int i; + uint32_t small; + __m256i f0, f1, g0, g1; + const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); + const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); + + for(i=0;ivec[2*i+0]); + f1 = _mm256_load_si256(&a->vec[2*i+1]); + f0 = _mm256_sub_epi16(hq, f0); + f1 = _mm256_sub_epi16(hq, f1); + g0 = _mm256_srai_epi16(f0, 15); + g1 = _mm256_srai_epi16(f1, 15); + f0 = _mm256_xor_si256(f0, g0); + f1 = _mm256_xor_si256(f1, g1); + f0 = _mm256_sub_epi16(f0, hhq); + f1 = _mm256_sub_epi16(f1, hhq); + f0 = _mm256_packs_epi16(f0, f1); + f0 = _mm256_permute4x64_epi64(f0, 0xD8); + small = _mm256_movemask_epi8(f0); + memcpy(&msg[4*i], &small, 4); + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 + prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); + poly_cbd_eta1(r, buf.vec); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; + prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); + poly_cbd_eta2(r, buf.vec); +} + +#ifndef KYBER_90S +#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta1(r2, buf[2].vec); + poly_cbd_eta1(r3, buf[3].vec); +} + +#if KYBER_K == 2 +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[32] = nonce0; + buf[1].coeffs[32] = nonce1; + buf[2].coeffs[32] = nonce2; + buf[3].coeffs[32] = nonce3; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + poly_cbd_eta1(r0, buf[0].vec); + poly_cbd_eta1(r1, buf[1].vec); + poly_cbd_eta2(r2, buf[2].vec); + poly_cbd_eta2(r3, buf[3].vec); +} +#endif +#endif + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place. +* Input coefficients assumed to be in normal order, +* output coefficients are in special order that is natural +* for the vectorization. Input coefficients are assumed to be +* bounded by q in absolute value, output coefficients are bounded +* by 16118 in absolute value. +* +* Arguments: - poly *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* Input coefficients assumed to be in special order from vectorized +* forward ntt, output in normal order. Input coefficients can be +* arbitrary 16-bit integers, output coefficients are bounded by 14870 +* in absolute value. +* +* Arguments: - poly *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt_avx(r->vec, qdata.vec); +} + +void poly_nttunpack(poly *r) +{ + nttunpack_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain. +* One of the input polynomials needs to have coefficients +* bounded by q, the other polynomial can have arbitrary +* coefficients. Output coefficients are bounded by 6656. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + basemul_avx(r->vec, a->vec, b->vec, qdata.vec); +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + tomont_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + reduce_avx(r->vec, qdata.vec); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_add_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials. No modular reduction +* is performed. +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + __m256i f0, f1; + + for(i=0;ivec[i]); + f1 = _mm256_load_si256(&b->vec[i]); + f0 = _mm256_sub_epi16(f0, f1); + _mm256_store_si256(&r->vec[i], f0); + } +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.h new file mode 100644 index 0000000000..6a9cf71c70 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.h @@ -0,0 +1,77 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" + +typedef ALIGNED_INT16(KYBER_N) poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#ifndef KYBER_90S +#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + +#if KYBER_K == 2 +#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[32], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); +#endif +#endif + + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..a0174b7b3f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) +static void poly_compress10(uint8_t r[320], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(15); + const __m256i shift1 = _mm256_set1_epi16(1 << 12); + const __m256i mask = _mm256_set1_epi16(1023); + const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(12); + const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, + -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f0 = _mm256_srli_epi64(f0,12); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blend_epi16(t0,t1,0xE0); + _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); + memcpy(&r[20*i+16],&t1,4); + } +} + +static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); + const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, + 6, 5, 5, 4, 4, 3, 3, 2, + 9, 8, 8, 7, 7, 6, 6, 5, + 4, 3, 3, 2, 2, 1, 1, 0); + const __m256i sllvdidx = _mm256_set1_epi64x(4); + const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); + + for(i=0;ivec[i],f); + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) +static void poly_compress11(uint8_t r[352+2], const poly * restrict a) +{ + unsigned int i; + __m256i f0, f1, f2; + __m128i t0, t1; + const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); + const __m256i v8 = _mm256_slli_epi16(v,3); + const __m256i off = _mm256_set1_epi16(36); + const __m256i shift1 = _mm256_set1_epi16(1 << 13); + const __m256i mask = _mm256_set1_epi16(2047); + const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); + const __m256i sllvdidx = _mm256_set1_epi64x(10); + const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); + const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, + -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + for(i=0;ivec[i]); + f1 = _mm256_mullo_epi16(f0,v8); + f2 = _mm256_add_epi16(f0,off); + f0 = _mm256_slli_epi16(f0,3); + f0 = _mm256_mulhi_epi16(f0,v); + f2 = _mm256_sub_epi16(f1,f2); + f1 = _mm256_andnot_si256(f1,f2); + f1 = _mm256_srli_epi16(f1,15); + f0 = _mm256_sub_epi16(f0,f1); + f0 = _mm256_mulhrs_epi16(f0,shift1); + f0 = _mm256_and_si256(f0,mask); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_sllv_epi32(f0,sllvdidx); + f1 = _mm256_bsrli_epi128(f0,8); + f0 = _mm256_srlv_epi64(f0,srlvqidx); + f1 = _mm256_slli_epi64(f1,34); + f0 = _mm256_add_epi64(f0,f1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + t0 = _mm256_castsi256_si128(f0); + t1 = _mm256_extracti128_si256(f0,1); + t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); + _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); + _mm_storel_epi64((__m128i *)&r[22*i+16],t1); + } +} + +static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) +{ + unsigned int i; + __m256i f; + const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, + 8, 7, 6, 5, 5, 4, 4, 3, + 10, 9, 9, 8, 7, 6, 6, 5, + 5, 4, 3, 2, 2, 1, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); + const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); + const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); + const __m256i mask = _mm256_set1_epi16(32752); + + for(i=0;ivec[i],f); + } +} + +#endif + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i]); +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) +{ + unsigned int i; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + for(i=0;ivec[i],&a[320*i]); +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + for(i=0;ivec[i],&a[352*i]); +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements in a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly tmp; + + poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); + for(i=1;ivec[i],&b->vec[i]); + poly_add(r,r,&tmp); + } +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..2ce23c31ff --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/reduce.h new file mode 100644 index 0000000000..5368185b5f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/reduce.h @@ -0,0 +1,12 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include "params.h" +#include + +#define reduce_avx KYBER_NAMESPACE(reduce_avx) +void reduce_avx(__m256i *r, const __m256i *qdata); +#define tomont_avx KYBER_NAMESPACE(tomont_avx) +void tomont_avx(__m256i *r, const __m256i *qdata); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..9060a44cb9 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include "params.h" +#include "consts.h" +#include "rejsample.h" + +//#define BMI + +#ifndef BMI +static const uint8_t idx[256][8] = { + {-1, -1, -1, -1, -1, -1, -1, -1}, + { 0, -1, -1, -1, -1, -1, -1, -1}, + { 2, -1, -1, -1, -1, -1, -1, -1}, + { 0, 2, -1, -1, -1, -1, -1, -1}, + { 4, -1, -1, -1, -1, -1, -1, -1}, + { 0, 4, -1, -1, -1, -1, -1, -1}, + { 2, 4, -1, -1, -1, -1, -1, -1}, + { 0, 2, 4, -1, -1, -1, -1, -1}, + { 6, -1, -1, -1, -1, -1, -1, -1}, + { 0, 6, -1, -1, -1, -1, -1, -1}, + { 2, 6, -1, -1, -1, -1, -1, -1}, + { 0, 2, 6, -1, -1, -1, -1, -1}, + { 4, 6, -1, -1, -1, -1, -1, -1}, + { 0, 4, 6, -1, -1, -1, -1, -1}, + { 2, 4, 6, -1, -1, -1, -1, -1}, + { 0, 2, 4, 6, -1, -1, -1, -1}, + { 8, -1, -1, -1, -1, -1, -1, -1}, + { 0, 8, -1, -1, -1, -1, -1, -1}, + { 2, 8, -1, -1, -1, -1, -1, -1}, + { 0, 2, 8, -1, -1, -1, -1, -1}, + { 4, 8, -1, -1, -1, -1, -1, -1}, + { 0, 4, 8, -1, -1, -1, -1, -1}, + { 2, 4, 8, -1, -1, -1, -1, -1}, + { 0, 2, 4, 8, -1, -1, -1, -1}, + { 6, 8, -1, -1, -1, -1, -1, -1}, + { 0, 6, 8, -1, -1, -1, -1, -1}, + { 2, 6, 8, -1, -1, -1, -1, -1}, + { 0, 2, 6, 8, -1, -1, -1, -1}, + { 4, 6, 8, -1, -1, -1, -1, -1}, + { 0, 4, 6, 8, -1, -1, -1, -1}, + { 2, 4, 6, 8, -1, -1, -1, -1}, + { 0, 2, 4, 6, 8, -1, -1, -1}, + {10, -1, -1, -1, -1, -1, -1, -1}, + { 0, 10, -1, -1, -1, -1, -1, -1}, + { 2, 10, -1, -1, -1, -1, -1, -1}, + { 0, 2, 10, -1, -1, -1, -1, -1}, + { 4, 10, -1, -1, -1, -1, -1, -1}, + { 0, 4, 10, -1, -1, -1, -1, -1}, + { 2, 4, 10, -1, -1, -1, -1, -1}, + { 0, 2, 4, 10, -1, -1, -1, -1}, + { 6, 10, -1, -1, -1, -1, -1, -1}, + { 0, 6, 10, -1, -1, -1, -1, -1}, + { 2, 6, 10, -1, -1, -1, -1, -1}, + { 0, 2, 6, 10, -1, -1, -1, -1}, + { 4, 6, 10, -1, -1, -1, -1, -1}, + { 0, 4, 6, 10, -1, -1, -1, -1}, + { 2, 4, 6, 10, -1, -1, -1, -1}, + { 0, 2, 4, 6, 10, -1, -1, -1}, + { 8, 10, -1, -1, -1, -1, -1, -1}, + { 0, 8, 10, -1, -1, -1, -1, -1}, + { 2, 8, 10, -1, -1, -1, -1, -1}, + { 0, 2, 8, 10, -1, -1, -1, -1}, + { 4, 8, 10, -1, -1, -1, -1, -1}, + { 0, 4, 8, 10, -1, -1, -1, -1}, + { 2, 4, 8, 10, -1, -1, -1, -1}, + { 0, 2, 4, 8, 10, -1, -1, -1}, + { 6, 8, 10, -1, -1, -1, -1, -1}, + { 0, 6, 8, 10, -1, -1, -1, -1}, + { 2, 6, 8, 10, -1, -1, -1, -1}, + { 0, 2, 6, 8, 10, -1, -1, -1}, + { 4, 6, 8, 10, -1, -1, -1, -1}, + { 0, 4, 6, 8, 10, -1, -1, -1}, + { 2, 4, 6, 8, 10, -1, -1, -1}, + { 0, 2, 4, 6, 8, 10, -1, -1}, + {12, -1, -1, -1, -1, -1, -1, -1}, + { 0, 12, -1, -1, -1, -1, -1, -1}, + { 2, 12, -1, -1, -1, -1, -1, -1}, + { 0, 2, 12, -1, -1, -1, -1, -1}, + { 4, 12, -1, -1, -1, -1, -1, -1}, + { 0, 4, 12, -1, -1, -1, -1, -1}, + { 2, 4, 12, -1, -1, -1, -1, -1}, + { 0, 2, 4, 12, -1, -1, -1, -1}, + { 6, 12, -1, -1, -1, -1, -1, -1}, + { 0, 6, 12, -1, -1, -1, -1, -1}, + { 2, 6, 12, -1, -1, -1, -1, -1}, + { 0, 2, 6, 12, -1, -1, -1, -1}, + { 4, 6, 12, -1, -1, -1, -1, -1}, + { 0, 4, 6, 12, -1, -1, -1, -1}, + { 2, 4, 6, 12, -1, -1, -1, -1}, + { 0, 2, 4, 6, 12, -1, -1, -1}, + { 8, 12, -1, -1, -1, -1, -1, -1}, + { 0, 8, 12, -1, -1, -1, -1, -1}, + { 2, 8, 12, -1, -1, -1, -1, -1}, + { 0, 2, 8, 12, -1, -1, -1, -1}, + { 4, 8, 12, -1, -1, -1, -1, -1}, + { 0, 4, 8, 12, -1, -1, -1, -1}, + { 2, 4, 8, 12, -1, -1, -1, -1}, + { 0, 2, 4, 8, 12, -1, -1, -1}, + { 6, 8, 12, -1, -1, -1, -1, -1}, + { 0, 6, 8, 12, -1, -1, -1, -1}, + { 2, 6, 8, 12, -1, -1, -1, -1}, + { 0, 2, 6, 8, 12, -1, -1, -1}, + { 4, 6, 8, 12, -1, -1, -1, -1}, + { 0, 4, 6, 8, 12, -1, -1, -1}, + { 2, 4, 6, 8, 12, -1, -1, -1}, + { 0, 2, 4, 6, 8, 12, -1, -1}, + {10, 12, -1, -1, -1, -1, -1, -1}, + { 0, 10, 12, -1, -1, -1, -1, -1}, + { 2, 10, 12, -1, -1, -1, -1, -1}, + { 0, 2, 10, 12, -1, -1, -1, -1}, + { 4, 10, 12, -1, -1, -1, -1, -1}, + { 0, 4, 10, 12, -1, -1, -1, -1}, + { 2, 4, 10, 12, -1, -1, -1, -1}, + { 0, 2, 4, 10, 12, -1, -1, -1}, + { 6, 10, 12, -1, -1, -1, -1, -1}, + { 0, 6, 10, 12, -1, -1, -1, -1}, + { 2, 6, 10, 12, -1, -1, -1, -1}, + { 0, 2, 6, 10, 12, -1, -1, -1}, + { 4, 6, 10, 12, -1, -1, -1, -1}, + { 0, 4, 6, 10, 12, -1, -1, -1}, + { 2, 4, 6, 10, 12, -1, -1, -1}, + { 0, 2, 4, 6, 10, 12, -1, -1}, + { 8, 10, 12, -1, -1, -1, -1, -1}, + { 0, 8, 10, 12, -1, -1, -1, -1}, + { 2, 8, 10, 12, -1, -1, -1, -1}, + { 0, 2, 8, 10, 12, -1, -1, -1}, + { 4, 8, 10, 12, -1, -1, -1, -1}, + { 0, 4, 8, 10, 12, -1, -1, -1}, + { 2, 4, 8, 10, 12, -1, -1, -1}, + { 0, 2, 4, 8, 10, 12, -1, -1}, + { 6, 8, 10, 12, -1, -1, -1, -1}, + { 0, 6, 8, 10, 12, -1, -1, -1}, + { 2, 6, 8, 10, 12, -1, -1, -1}, + { 0, 2, 6, 8, 10, 12, -1, -1}, + { 4, 6, 8, 10, 12, -1, -1, -1}, + { 0, 4, 6, 8, 10, 12, -1, -1}, + { 2, 4, 6, 8, 10, 12, -1, -1}, + { 0, 2, 4, 6, 8, 10, 12, -1}, + {14, -1, -1, -1, -1, -1, -1, -1}, + { 0, 14, -1, -1, -1, -1, -1, -1}, + { 2, 14, -1, -1, -1, -1, -1, -1}, + { 0, 2, 14, -1, -1, -1, -1, -1}, + { 4, 14, -1, -1, -1, -1, -1, -1}, + { 0, 4, 14, -1, -1, -1, -1, -1}, + { 2, 4, 14, -1, -1, -1, -1, -1}, + { 0, 2, 4, 14, -1, -1, -1, -1}, + { 6, 14, -1, -1, -1, -1, -1, -1}, + { 0, 6, 14, -1, -1, -1, -1, -1}, + { 2, 6, 14, -1, -1, -1, -1, -1}, + { 0, 2, 6, 14, -1, -1, -1, -1}, + { 4, 6, 14, -1, -1, -1, -1, -1}, + { 0, 4, 6, 14, -1, -1, -1, -1}, + { 2, 4, 6, 14, -1, -1, -1, -1}, + { 0, 2, 4, 6, 14, -1, -1, -1}, + { 8, 14, -1, -1, -1, -1, -1, -1}, + { 0, 8, 14, -1, -1, -1, -1, -1}, + { 2, 8, 14, -1, -1, -1, -1, -1}, + { 0, 2, 8, 14, -1, -1, -1, -1}, + { 4, 8, 14, -1, -1, -1, -1, -1}, + { 0, 4, 8, 14, -1, -1, -1, -1}, + { 2, 4, 8, 14, -1, -1, -1, -1}, + { 0, 2, 4, 8, 14, -1, -1, -1}, + { 6, 8, 14, -1, -1, -1, -1, -1}, + { 0, 6, 8, 14, -1, -1, -1, -1}, + { 2, 6, 8, 14, -1, -1, -1, -1}, + { 0, 2, 6, 8, 14, -1, -1, -1}, + { 4, 6, 8, 14, -1, -1, -1, -1}, + { 0, 4, 6, 8, 14, -1, -1, -1}, + { 2, 4, 6, 8, 14, -1, -1, -1}, + { 0, 2, 4, 6, 8, 14, -1, -1}, + {10, 14, -1, -1, -1, -1, -1, -1}, + { 0, 10, 14, -1, -1, -1, -1, -1}, + { 2, 10, 14, -1, -1, -1, -1, -1}, + { 0, 2, 10, 14, -1, -1, -1, -1}, + { 4, 10, 14, -1, -1, -1, -1, -1}, + { 0, 4, 10, 14, -1, -1, -1, -1}, + { 2, 4, 10, 14, -1, -1, -1, -1}, + { 0, 2, 4, 10, 14, -1, -1, -1}, + { 6, 10, 14, -1, -1, -1, -1, -1}, + { 0, 6, 10, 14, -1, -1, -1, -1}, + { 2, 6, 10, 14, -1, -1, -1, -1}, + { 0, 2, 6, 10, 14, -1, -1, -1}, + { 4, 6, 10, 14, -1, -1, -1, -1}, + { 0, 4, 6, 10, 14, -1, -1, -1}, + { 2, 4, 6, 10, 14, -1, -1, -1}, + { 0, 2, 4, 6, 10, 14, -1, -1}, + { 8, 10, 14, -1, -1, -1, -1, -1}, + { 0, 8, 10, 14, -1, -1, -1, -1}, + { 2, 8, 10, 14, -1, -1, -1, -1}, + { 0, 2, 8, 10, 14, -1, -1, -1}, + { 4, 8, 10, 14, -1, -1, -1, -1}, + { 0, 4, 8, 10, 14, -1, -1, -1}, + { 2, 4, 8, 10, 14, -1, -1, -1}, + { 0, 2, 4, 8, 10, 14, -1, -1}, + { 6, 8, 10, 14, -1, -1, -1, -1}, + { 0, 6, 8, 10, 14, -1, -1, -1}, + { 2, 6, 8, 10, 14, -1, -1, -1}, + { 0, 2, 6, 8, 10, 14, -1, -1}, + { 4, 6, 8, 10, 14, -1, -1, -1}, + { 0, 4, 6, 8, 10, 14, -1, -1}, + { 2, 4, 6, 8, 10, 14, -1, -1}, + { 0, 2, 4, 6, 8, 10, 14, -1}, + {12, 14, -1, -1, -1, -1, -1, -1}, + { 0, 12, 14, -1, -1, -1, -1, -1}, + { 2, 12, 14, -1, -1, -1, -1, -1}, + { 0, 2, 12, 14, -1, -1, -1, -1}, + { 4, 12, 14, -1, -1, -1, -1, -1}, + { 0, 4, 12, 14, -1, -1, -1, -1}, + { 2, 4, 12, 14, -1, -1, -1, -1}, + { 0, 2, 4, 12, 14, -1, -1, -1}, + { 6, 12, 14, -1, -1, -1, -1, -1}, + { 0, 6, 12, 14, -1, -1, -1, -1}, + { 2, 6, 12, 14, -1, -1, -1, -1}, + { 0, 2, 6, 12, 14, -1, -1, -1}, + { 4, 6, 12, 14, -1, -1, -1, -1}, + { 0, 4, 6, 12, 14, -1, -1, -1}, + { 2, 4, 6, 12, 14, -1, -1, -1}, + { 0, 2, 4, 6, 12, 14, -1, -1}, + { 8, 12, 14, -1, -1, -1, -1, -1}, + { 0, 8, 12, 14, -1, -1, -1, -1}, + { 2, 8, 12, 14, -1, -1, -1, -1}, + { 0, 2, 8, 12, 14, -1, -1, -1}, + { 4, 8, 12, 14, -1, -1, -1, -1}, + { 0, 4, 8, 12, 14, -1, -1, -1}, + { 2, 4, 8, 12, 14, -1, -1, -1}, + { 0, 2, 4, 8, 12, 14, -1, -1}, + { 6, 8, 12, 14, -1, -1, -1, -1}, + { 0, 6, 8, 12, 14, -1, -1, -1}, + { 2, 6, 8, 12, 14, -1, -1, -1}, + { 0, 2, 6, 8, 12, 14, -1, -1}, + { 4, 6, 8, 12, 14, -1, -1, -1}, + { 0, 4, 6, 8, 12, 14, -1, -1}, + { 2, 4, 6, 8, 12, 14, -1, -1}, + { 0, 2, 4, 6, 8, 12, 14, -1}, + {10, 12, 14, -1, -1, -1, -1, -1}, + { 0, 10, 12, 14, -1, -1, -1, -1}, + { 2, 10, 12, 14, -1, -1, -1, -1}, + { 0, 2, 10, 12, 14, -1, -1, -1}, + { 4, 10, 12, 14, -1, -1, -1, -1}, + { 0, 4, 10, 12, 14, -1, -1, -1}, + { 2, 4, 10, 12, 14, -1, -1, -1}, + { 0, 2, 4, 10, 12, 14, -1, -1}, + { 6, 10, 12, 14, -1, -1, -1, -1}, + { 0, 6, 10, 12, 14, -1, -1, -1}, + { 2, 6, 10, 12, 14, -1, -1, -1}, + { 0, 2, 6, 10, 12, 14, -1, -1}, + { 4, 6, 10, 12, 14, -1, -1, -1}, + { 0, 4, 6, 10, 12, 14, -1, -1}, + { 2, 4, 6, 10, 12, 14, -1, -1}, + { 0, 2, 4, 6, 10, 12, 14, -1}, + { 8, 10, 12, 14, -1, -1, -1, -1}, + { 0, 8, 10, 12, 14, -1, -1, -1}, + { 2, 8, 10, 12, 14, -1, -1, -1}, + { 0, 2, 8, 10, 12, 14, -1, -1}, + { 4, 8, 10, 12, 14, -1, -1, -1}, + { 0, 4, 8, 10, 12, 14, -1, -1}, + { 2, 4, 8, 10, 12, 14, -1, -1}, + { 0, 2, 4, 8, 10, 12, 14, -1}, + { 6, 8, 10, 12, 14, -1, -1, -1}, + { 0, 6, 8, 10, 12, 14, -1, -1}, + { 2, 6, 8, 10, 12, 14, -1, -1}, + { 0, 2, 6, 8, 10, 12, 14, -1}, + { 4, 6, 8, 10, 12, 14, -1, -1}, + { 0, 4, 6, 8, 10, 12, 14, -1}, + { 2, 4, 6, 8, 10, 12, 14, -1}, + { 0, 2, 4, 6, 8, 10, 12, 14} +}; +#endif + +#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) +#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) + +unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + uint32_t good; +#ifdef BMI + uint64_t idx0, idx1, idx2, idx3; +#endif + const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); + const __m256i ones = _mm256_set1_epi8(1); + const __m256i mask = _mm256_set1_epi16(0xFFF); + const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, + 9, 8, 8, 7, 6, 5, 5, 4, + 11,10,10, 9, 8, 7, 7, 6, + 5, 4, 4, 3, 2, 1, 1, 0); + __m256i f0, f1, g0, g1, g2, g3; + __m128i f, t, pilo, pihi; + + ctr = pos = 0; + while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { + f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); + f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); + f0 = _mm256_permute4x64_epi64(f0, 0x94); + f1 = _mm256_permute4x64_epi64(f1, 0x94); + f0 = _mm256_shuffle_epi8(f0, idx8); + f1 = _mm256_shuffle_epi8(f1, idx8); + g0 = _mm256_srli_epi16(f0, 4); + g1 = _mm256_srli_epi16(f1, 4); + f0 = _mm256_blend_epi16(f0, g0, 0xAA); + f1 = _mm256_blend_epi16(f1, g1, 0xAA); + f0 = _mm256_and_si256(f0, mask); + f1 = _mm256_and_si256(f1, mask); + pos += 48; + + g0 = _mm256_cmpgt_epi16(bound, f0); + g1 = _mm256_cmpgt_epi16(bound, f1); + + g0 = _mm256_packs_epi16(g0, g1); + good = _mm256_movemask_epi8(g0); + +#ifdef BMI + idx0 = _pdep_u64(good >> 0, 0x0101010101010101); + idx1 = _pdep_u64(good >> 8, 0x0101010101010101); + idx2 = _pdep_u64(good >> 16, 0x0101010101010101); + idx3 = _pdep_u64(good >> 24, 0x0101010101010101); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + idx1 = (idx1 << 8) - idx1; + idx1 = _pext_u64(0x0E0C0A0806040200, idx1); + idx2 = (idx2 << 8) - idx2; + idx2 = _pext_u64(0x0E0C0A0806040200, idx2); + idx3 = (idx3 << 8) - idx3; + idx3 = _pext_u64(0x0E0C0A0806040200, idx3); + + g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); + g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); + g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); + g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); +#else + g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); + g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); + g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); + g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); +#endif + + g2 = _mm256_add_epi8(g0, ones); + g3 = _mm256_add_epi8(g1, ones); + g0 = _mm256_unpacklo_epi8(g0, g2); + g1 = _mm256_unpacklo_epi8(g1, g3); + + f0 = _mm256_shuffle_epi8(f0, g0); + f1 = _mm256_shuffle_epi8(f1, g1); + + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); + ctr += _mm_popcnt_u32((good >> 0) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); + ctr += _mm_popcnt_u32((good >> 16) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); + ctr += _mm_popcnt_u32((good >> 8) & 0xFF); + _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); + ctr += _mm_popcnt_u32((good >> 24) & 0xFF); + } + + while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { + f = _mm_loadu_si128((__m128i *)&buf[pos]); + f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); + t = _mm_srli_epi16(f, 4); + f = _mm_blend_epi16(f, t, 0xAA); + f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); + pos += 12; + + t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); + good = _mm_movemask_epi8(t); + +#ifdef BMI + good &= 0x5555; + idx0 = _pdep_u64(good, 0x1111111111111111); + idx0 = (idx0 << 8) - idx0; + idx0 = _pext_u64(0x0E0C0A0806040200, idx0); + pilo = _mm_cvtsi64_si128(idx0); +#else + good = _pext_u32(good, 0x5555); + pilo = _mm_loadl_epi64((__m128i *)&idx[good]); +#endif + + pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); + pilo = _mm_unpacklo_epi8(pilo, pihi); + f = _mm_shuffle_epi8(f, pilo); + _mm_storeu_si128((__m128i *)&r[ctr], f); + ctr += _mm_popcnt_u32(good); + } + + while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(val1 < KYBER_Q && ctr < KYBER_N) + r[ctr++] = val1; + } + + return ctr; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..3be5e2192e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.h @@ -0,0 +1,14 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) + +#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..18325ebec0 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.S @@ -0,0 +1,255 @@ +#include "consts.h" +.include "fq.inc" +.include "shuffle.inc" + +/* +nttpack_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 +shuffle1 10,11,8,11 + +shuffle2 3,4,10,4 +shuffle2 6,8,3,8 +shuffle2 5,7,6,7 +shuffle2 9,11,5,11 + +shuffle4 10,3,9,3 +shuffle4 6,5,10,5 +shuffle4 4,8,6,8 +shuffle4 7,11,4,11 + +shuffle8 9,10,7,10 +shuffle8 6,4,9,4 +shuffle8 3,5,6,5 +shuffle8 8,11,3,11 + +#store +vmovdqa %ymm7,(%rdi) +vmovdqa %ymm9,32(%rdi) +vmovdqa %ymm6,64(%rdi) +vmovdqa %ymm3,96(%rdi) +vmovdqa %ymm10,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm5,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret +*/ + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +shuffle1 9,5,10,5 +shuffle1 8,4,9,4 +shuffle1 7,3,8,3 +shuffle1 6,11,7,11 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm5,32(%rdi) +vmovdqa %ymm9,64(%rdi) +vmovdqa %ymm4,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm3,160(%rdi) +vmovdqa %ymm7,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret + +ntttobytes128_avx: +#load +vmovdqa (%rsi),%ymm5 +vmovdqa 32(%rsi),%ymm6 +vmovdqa 64(%rsi),%ymm7 +vmovdqa 96(%rsi),%ymm8 +vmovdqa 128(%rsi),%ymm9 +vmovdqa 160(%rsi),%ymm10 +vmovdqa 192(%rsi),%ymm11 +vmovdqa 224(%rsi),%ymm12 + +#csubq +csubq 5,13 +csubq 6,13 +csubq 7,13 +csubq 8,13 +csubq 9,13 +csubq 10,13 +csubq 11,13 +csubq 12,13 + +#bitpack +vpsllw $12,%ymm6,%ymm4 +vpor %ymm4,%ymm5,%ymm4 + +vpsrlw $4,%ymm6,%ymm5 +vpsllw $8,%ymm7,%ymm6 +vpor %ymm5,%ymm6,%ymm5 + +vpsrlw $8,%ymm7,%ymm6 +vpsllw $4,%ymm8,%ymm7 +vpor %ymm6,%ymm7,%ymm6 + +vpsllw $12,%ymm10,%ymm7 +vpor %ymm7,%ymm9,%ymm7 + +vpsrlw $4,%ymm10,%ymm8 +vpsllw $8,%ymm11,%ymm9 +vpor %ymm8,%ymm9,%ymm8 + +vpsrlw $8,%ymm11,%ymm9 +vpsllw $4,%ymm12,%ymm10 +vpor %ymm9,%ymm10,%ymm9 + +shuffle1 4,5,3,5 +shuffle1 6,7,4,7 +shuffle1 8,9,6,9 + +shuffle2 3,4,8,4 +shuffle2 6,5,3,5 +shuffle2 7,9,6,9 + +shuffle4 8,3,7,3 +shuffle4 6,4,8,4 +shuffle4 5,9,6,9 + +shuffle8 7,8,5,8 +shuffle8 6,3,7,3 +shuffle8 4,9,6,9 + +#store +vmovdqu %ymm5,(%rdi) +vmovdqu %ymm7,32(%rdi) +vmovdqu %ymm6,64(%rdi) +vmovdqu %ymm8,96(%rdi) +vmovdqu %ymm3,128(%rdi) +vmovdqu %ymm9,160(%rdi) + +ret + +.global cdecl(ntttobytes_avx) +cdecl(ntttobytes_avx): +#consts +vmovdqa _16XQ*2(%rdx),%ymm0 +call ntttobytes128_avx +add $256,%rsi +add $192,%rdi +call ntttobytes128_avx +ret + +nttfrombytes128_avx: +#load +vmovdqu (%rsi),%ymm4 +vmovdqu 32(%rsi),%ymm5 +vmovdqu 64(%rsi),%ymm6 +vmovdqu 96(%rsi),%ymm7 +vmovdqu 128(%rsi),%ymm8 +vmovdqu 160(%rsi),%ymm9 + +shuffle8 4,7,3,7 +shuffle8 5,8,4,8 +shuffle8 6,9,5,9 + +shuffle4 3,8,6,8 +shuffle4 7,5,3,5 +shuffle4 4,9,7,9 + +shuffle2 6,5,4,5 +shuffle2 8,7,6,7 +shuffle2 3,9,8,9 + +shuffle1 4,7,10,7 +shuffle1 5,8,4,8 +shuffle1 6,9,5,9 + +#bitunpack +vpsrlw $12,%ymm10,%ymm11 +vpsllw $4,%ymm7,%ymm12 +vpor %ymm11,%ymm12,%ymm11 +vpand %ymm0,%ymm10,%ymm10 +vpand %ymm0,%ymm11,%ymm11 + +vpsrlw $8,%ymm7,%ymm12 +vpsllw $8,%ymm4,%ymm13 +vpor %ymm12,%ymm13,%ymm12 +vpand %ymm0,%ymm12,%ymm12 + +vpsrlw $4,%ymm4,%ymm13 +vpand %ymm0,%ymm13,%ymm13 + +vpsrlw $12,%ymm8,%ymm14 +vpsllw $4,%ymm5,%ymm15 +vpor %ymm14,%ymm15,%ymm14 +vpand %ymm0,%ymm8,%ymm8 +vpand %ymm0,%ymm14,%ymm14 + +vpsrlw $8,%ymm5,%ymm15 +vpsllw $8,%ymm9,%ymm1 +vpor %ymm15,%ymm1,%ymm15 +vpand %ymm0,%ymm15,%ymm15 + +vpsrlw $4,%ymm9,%ymm1 +vpand %ymm0,%ymm1,%ymm1 + +#store +vmovdqa %ymm10,(%rdi) +vmovdqa %ymm11,32(%rdi) +vmovdqa %ymm12,64(%rdi) +vmovdqa %ymm13,96(%rdi) +vmovdqa %ymm8,128(%rdi) +vmovdqa %ymm14,160(%rdi) +vmovdqa %ymm15,192(%rdi) +vmovdqa %ymm1,224(%rdi) + +ret + +.global cdecl(nttfrombytes_avx) +cdecl(nttfrombytes_avx): +#consts +vmovdqa _16XMASK*2(%rdx),%ymm0 +call nttfrombytes128_avx +add $256,%rdi +add $192,%rsi +call nttfrombytes128_avx +ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..e4941f7a86 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric.h @@ -0,0 +1,34 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" +#include "fips202x4.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/verify.c new file mode 100644 index 0000000000..aa8e2850b1 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/verify.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint64_t r; + __m256i f, g, h; + + h = _mm256_setzero_si256(); + for(i=0;i> 63; + return r; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + __m256i xvec, rvec, bvec; + + bvec = _mm256_set1_epi64x(-(uint64_t)b); + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/LICENSE new file mode 100644 index 0000000000..7922ab8007 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/LICENSE @@ -0,0 +1,6 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/api.h new file mode 100644 index 0000000000..70d40f3f3e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/api.h @@ -0,0 +1,66 @@ +#ifndef API_H +#define API_H + +#include + +#define pqcrystals_kyber512_SECRETKEYBYTES 1632 +#define pqcrystals_kyber512_PUBLICKEYBYTES 800 +#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 +#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber512_ENCCOINBYTES 32 +#define pqcrystals_kyber512_BYTES 32 + +#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES +#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES +#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES +#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES +#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES +#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES + +int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber768_SECRETKEYBYTES 2400 +#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 +#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 +#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber768_ENCCOINBYTES 32 +#define pqcrystals_kyber768_BYTES 32 + +#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES +#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES +#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES +#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES +#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES +#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES + +int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 +#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 +#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 +#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 +#define pqcrystals_kyber1024_ENCCOINBYTES 32 +#define pqcrystals_kyber1024_BYTES 32 + +#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES +#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES +#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES +#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES +#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES +#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES + +int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); +int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); +int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.c new file mode 100644 index 0000000000..1500ffea56 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.c @@ -0,0 +1,128 @@ +#include +#include "params.h" +#include "cbd.h" + +/************************************************* +* Name: load32_littleendian +* +* Description: load 4 bytes into a 32-bit integer +* in little-endian order +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x +**************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* +* Name: load24_littleendian +* +* Description: load 3 bytes into a 32-bit integer +* in little-endian order. +* This function is only needed for Kyber-512 +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) +**************************************************/ +#if KYBER_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + + +/************************************************* +* Name: cbd2 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x55555555; + + for(j=0;j<8;j++) { + a = (d >> (4*j+0)) & 0x3; + b = (d >> (4*j+2)) & 0x3; + r->coeffs[8*i+j] = a - b; + } + } +} + +/************************************************* +* Name: cbd3 +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter eta=3. +* This function is only needed for Kyber-512 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +#if KYBER_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) +{ + unsigned int i,j; + uint32_t t,d; + int16_t a,b; + + for(i=0;i>1) & 0x00249249; + d += (t>>2) & 0x00249249; + + for(j=0;j<4;j++) { + a = (d >> (6*j+0)) & 0x7; + b = (d >> (6*j+3)) & 0x7; + r->coeffs[4*i+j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) +{ +#if KYBER_ETA1 == 2 + cbd2(r, buf); +#elif KYBER_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) +{ +#if KYBER_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.h new file mode 100644 index 0000000000..7b677d745d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.h @@ -0,0 +1,14 @@ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) +void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); + +#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) +void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/indcpa.c new file mode 100644 index 0000000000..4a8b4c894f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/indcpa.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include "params.h" +#include "indcpa.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "symmetric.h" +#include "randombytes.h" + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk +* and the public seed used to generate the matrix A. +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* polyvec *pk: pointer to the input public-key polyvec +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], + polyvec *pk, + const uint8_t seed[KYBER_SYMBYTES]) +{ + polyvec_tobytes(r, pk); + memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key polynomial vector +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* poly *pk: pointer to the input vector of polynomials b +* poly *v: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; + val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; + pos += 3; + + if(val0 < KYBER_Q) + r[ctr++] = val0; + if(ctr < len && val1 < KYBER_Q) + r[ctr++] = val1; + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) +// Not static for benchmarking +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) +{ + unsigned int ctr, i, j, k; + unsigned int buflen, off; + uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; + xof_state state; + xof_init(&state, seed); + + for(i=0;i +#include "params.h" +#include "polyvec.h" + +#define gen_matrix KYBER_NAMESPACE(gen_matrix) +void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); + +#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) +void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); + +#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.c new file mode 100644 index 0000000000..63abc1029c --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "params.h" +#include "kem.h" +#include "indcpa.h" +#include "verify.h" +#include "symmetric.h" +#include "randombytes.h" +/************************************************* +* Name: crypto_kem_keypair_derand +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* - uint8_t *coins: pointer to input randomness +* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, + uint8_t *sk, + const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_keypair(uint8_t *pk, + uint8_t *sk) +{ + uint8_t coins[2*KYBER_SYMBYTES]; + randombytes(coins, 2*KYBER_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc_derand +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* - const uint8_t *coins: pointer to input randomness +* (an already allocated array filled with KYBER_SYMBYTES random bytes) +** +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins) +{ + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, coins, KYBER_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + + memcpy(ss,kr,KYBER_SYMBYTES); + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *pk: pointer to input public key +* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_kem_enc(uint8_t *ct, + uint8_t *ss, + const uint8_t *pk) +{ + uint8_t coins[KYBER_SYMBYTES]; + randombytes(coins, KYBER_SYMBYTES); + crypto_kem_enc_derand(ct, ss, pk, coins); + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret +* (an already allocated array of KYBER_SSBYTES bytes) +* - const uint8_t *ct: pointer to input cipher text +* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key +* (an already allocated array of KYBER_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int crypto_kem_dec(uint8_t *ss, + const uint8_t *ct, + const uint8_t *sk) +{ + int fail; + uint8_t buf[2*KYBER_SYMBYTES]; + /* Will contain key, coins */ + uint8_t kr[2*KYBER_SYMBYTES]; + uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; + const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); + + /* Copy true key to return buffer if fail is false */ + cmov(ss,kr,KYBER_SYMBYTES,!fail); + + return 0; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.h new file mode 100644 index 0000000000..234f11966b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.h @@ -0,0 +1,35 @@ +#ifndef KEM_H +#define KEM_H + +#include +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES +#define CRYPTO_BYTES KYBER_SSBYTES + +#if (KYBER_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (KYBER_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (KYBER_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); + +#define crypto_kem_keypair KYBER_NAMESPACE(keypair) +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); + +#define crypto_kem_enc KYBER_NAMESPACE(enc) +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +#define crypto_kem_dec KYBER_NAMESPACE(dec) +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.c new file mode 100644 index 0000000000..2f2eb10b2f --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.c @@ -0,0 +1,146 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint8_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 +}; + +void init_ntt() { + unsigned int i; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i=1;i<128;i++) + tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); + + for(i=0;i<128;i++) { + zetas[i] = tmp[tree[i]]; + if(zetas[i] > KYBER_Q/2) + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; + } +} +*/ + +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, + -171, 622, 1577, 182, 962, -1202, -1474, 1468, + 573, -1325, 264, 383, -829, 1458, -1602, -130, + -681, 1017, 732, 608, -1542, 411, -205, -1571, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -1103, 430, 555, 843, -1251, 871, 1550, 105, + 422, 587, 177, -235, -291, -460, 1574, 1653, + -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, + 817, 1097, 603, 610, 1322, -1285, -1465, 384, + -1215, -136, 1218, -1335, -874, 220, -1187, -1659, + -1185, -1530, -1278, 794, -1510, -854, -870, 478, + -108, -308, 996, 991, 958, -1460, 1522, 1628 +}; + +/************************************************* +* Name: fqmul +* +* Description: Multiplication followed by Montgomery reduction +* +* Arguments: - int16_t a: first factor +* - int16_t b: second factor +* +* Returns 16-bit integer congruent to a*b*R^{-1} mod q +**************************************************/ +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} + +/************************************************* +* Name: ntt +* +* Description: Inplace number-theoretic transform (NTT) in Rq. +* input is in standard order, output is in bitreversed order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void ntt(int16_t r[256]) { + unsigned int len, start, j, k; + int16_t t, zeta; + + k = 1; + for(len = 128; len >= 2; len >>= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k++]; + for(j = start; j < start + len; j++) { + t = fqmul(zeta, r[j + len]); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inplace inverse number-theoretic transform in Rq and +* multiplication by Montgomery factor 2^16. +* Input is in bitreversed order, output is in standard order +* +* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void invntt(int16_t r[256]) { + unsigned int start, len, j, k; + int16_t t, zeta; + const int16_t f = 1441; // mont^2/128 + + k = 127; + for(len = 2; len <= 128; len <<= 1) { + for(start = 0; start < 256; start = j + len) { + zeta = zetas[k--]; + for(j = start; j < start + len; j++) { + t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(zeta, r[j + len]); + } + } + } + + for(j = 0; j < 256; j++) + r[j] = fqmul(r[j], f); +} + +/************************************************* +* Name: basemul +* +* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) +* used for multiplication of elements in Rq in NTT domain +* +* Arguments: - int16_t r[2]: pointer to the output polynomial +* - const int16_t a[2]: pointer to the first factor +* - const int16_t b[2]: pointer to the second factor +* - int16_t zeta: integer defining the reduction polynomial +**************************************************/ +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) +{ + r[0] = fqmul(a[1], b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a[0], b[0]); + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.h new file mode 100644 index 0000000000..227ea74f08 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define zetas KYBER_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +#define ntt KYBER_NAMESPACE(ntt) +void ntt(int16_t poly[256]); + +#define invntt KYBER_NAMESPACE(invntt) +void invntt(int16_t poly[256]); + +#define basemul KYBER_NAMESPACE(basemul) +void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/params.h new file mode 100644 index 0000000000..36b2b987f3 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/params.h @@ -0,0 +1,55 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#ifndef KYBER_K +#define KYBER_K 3 /* Change this for different security strengths */ +#endif + + +/* Don't change parameters below this line */ +#if (KYBER_K == 2) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_ref_##s +#elif (KYBER_K == 3) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_ref_##s +#elif (KYBER_K == 4) +#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_ref_##s +#else +#error "KYBER_K must be in {2,3,4}" +#endif + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + +#if KYBER_K == 2 +#define KYBER_ETA1 3 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 3 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 128 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) +#elif KYBER_K == 4 +#define KYBER_ETA1 2 +#define KYBER_POLYCOMPRESSEDBYTES 160 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +#endif + +#define KYBER_ETA2 2 + +#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) +#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.c new file mode 100644 index 0000000000..0fe5a20f63 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.c @@ -0,0 +1,360 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "cbd.h" +#include "symmetric.h" + +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (of length KYBER_POLYCOMPRESSEDBYTES) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) +{ + unsigned int i,j; + int32_t u; + uint32_t d0; + uint8_t t[8]; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); + r[1] = t[2] | (t[3] << 4); + r[2] = t[4] | (t[5] << 4); + r[3] = t[6] | (t[7] << 4); + r += 4; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + for(i=0;icoeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; + } + + r[0] = (t[0] >> 0) | (t[1] << 5); + r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); + r[2] = (t[3] >> 1) | (t[4] << 4); + r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); + r[4] = (t[6] >> 2) | (t[7] << 3); + r += 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_decompress +* +* Description: De-serialization and subsequent decompression of a polynomial; +* approximate inverse of poly_compress +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYCOMPRESSEDBYTES bytes) +**************************************************/ +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) +{ + unsigned int i; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) + for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; + r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; + a += 1; + } +#elif (KYBER_POLYCOMPRESSEDBYTES == 160) + unsigned int j; + uint8_t t[8]; + for(i=0;i> 0); + t[1] = (a[0] >> 5) | (a[1] << 3); + t[2] = (a[1] >> 2); + t[3] = (a[1] >> 7) | (a[2] << 1); + t[4] = (a[2] >> 4) | (a[3] << 4); + t[5] = (a[3] >> 1); + t[6] = (a[3] >> 6) | (a[4] << 2); + t[7] = (a[4] >> 3); + a += 5; + + for(j=0;j<8;j++) + r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; + } +#else +#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" +#endif +} + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) +{ + unsigned int i; + uint16_t t0, t1; + + for(i=0;icoeffs[2*i]; + t0 += ((int16_t)t0 >> 15) & KYBER_Q; + t1 = a->coeffs[2*i+1]; + t1 += ((int16_t)t1 >> 15) & KYBER_Q; + r[3*i+0] = (t0 >> 0); + r[3*i+1] = (t0 >> 8) | (t1 << 4); + r[3*i+2] = (t1 >> 4); + } +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array +* (of KYBER_POLYBYTES bytes) +**************************************************/ +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) +{ + unsigned int i; + for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; + r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; + } +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +{ + unsigned int i,j; + int16_t mask; + +#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) +#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" +#endif + + for(i=0;i> j)&1); + r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); + } + } +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message +* +* Arguments: - uint8_t *msg: pointer to output message +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) +{ + unsigned int i,j; + uint32_t t; + + for(i=0;icoeffs[8*i+j]; + // t += ((int16_t)t >> 15) & KYBER_Q; + // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; + t <<= 1; + t += 1665; + t *= 80635; + t >>= 28; + t &= 1; + msg[i] |= t << j; + } + } +} + +/************************************************* +* Name: poly_getnoise_eta1 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA1*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta1(r, buf); +} + +/************************************************* +* Name: poly_getnoise_eta2 +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t buf[KYBER_ETA2*KYBER_N/4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); +} + + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in normal order, output in bitreversed order +* +* Arguments: - uint16_t *r: pointer to in/output polynomial +**************************************************/ +void poly_ntt(poly *r) +{ + ntt(r->coeffs); + poly_reduce(r); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) +* of a polynomial in place; +* inputs assumed to be in bitreversed order, output in normal order +* +* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *r) +{ + invntt(r->coeffs); +} + +/************************************************* +* Name: poly_basemul_montgomery +* +* Description: Multiplication of two polynomials in NTT domain +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); + basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); + } +} + +/************************************************* +* Name: poly_tomont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from normal domain to Montgomery domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_tomont(poly *r) +{ + unsigned int i; + const int16_t f = (1ULL << 32) % KYBER_Q; + for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *r) +{ + unsigned int i; + for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_add(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials; no modular reduction is performed +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_sub(poly *r, const poly *a, const poly *b) +{ + unsigned int i; + for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.h new file mode 100644 index 0000000000..9a99c7cdad --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.h @@ -0,0 +1,53 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct{ + int16_t coeffs[KYBER_N]; +} poly; + +#define poly_compress KYBER_NAMESPACE(poly_compress) +void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); +#define poly_decompress KYBER_NAMESPACE(poly_decompress) +void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); + +#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) +void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); +#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) +void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); + +#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) +void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); +#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) +void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); + +#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) +void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) +void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); + +#define poly_ntt KYBER_NAMESPACE(poly_ntt) +void poly_ntt(poly *r); +#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r); +#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) +void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); +#define poly_tomont KYBER_NAMESPACE(poly_tomont) +void poly_tomont(poly *r); + +#define poly_reduce KYBER_NAMESPACE(poly_reduce) +void poly_reduce(poly *r); + +#define poly_add KYBER_NAMESPACE(poly_add) +void poly_add(poly *r, const poly *a, const poly *b); +#define poly_sub KYBER_NAMESPACE(poly_sub) +void poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.c new file mode 100644 index 0000000000..661c71ec32 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.c @@ -0,0 +1,247 @@ +#include +#include "params.h" +#include "poly.h" +#include "polyvec.h" + +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) +{ + unsigned int i,j,k; + uint64_t d0; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;ivec[i].coeffs[8*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; + + } + + r[ 0] = (t[0] >> 0); + r[ 1] = (t[0] >> 8) | (t[1] << 3); + r[ 2] = (t[1] >> 5) | (t[2] << 6); + r[ 3] = (t[2] >> 2); + r[ 4] = (t[2] >> 10) | (t[3] << 1); + r[ 5] = (t[3] >> 7) | (t[4] << 4); + r[ 6] = (t[4] >> 4) | (t[5] << 7); + r[ 7] = (t[5] >> 1); + r[ 8] = (t[5] >> 9) | (t[6] << 2); + r[ 9] = (t[6] >> 6) | (t[7] << 5); + r[10] = (t[7] >> 3); + r += 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;ivec[i].coeffs[4*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); + r[1] = (t[0] >> 8) | (t[1] << 2); + r[2] = (t[1] >> 6) | (t[2] << 4); + r[3] = (t[2] >> 4) | (t[3] << 6); + r[4] = (t[3] >> 2); + r += 5; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const uint8_t *a: pointer to input byte array +* (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) +{ + unsigned int i,j,k; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; + for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); + t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); + t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); + t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); + t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); + t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); + t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); + t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); + a += 11; + + for(k=0;k<8;k++) + r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; + } + } +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) + uint16_t t[4]; + for(i=0;i> 0) | ((uint16_t)a[1] << 8); + t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); + t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); + t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); + a += 5; + + for(k=0;k<4;k++) + r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; + } + } +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array +* (needs space for KYBER_POLYVECBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials +* (of length KYBER_POLYVECBYTES) +**************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) +{ + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_invntt_tomont +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* and multiply by Montgomery factor 2^16 +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* +* Description: Multiply elements of a and b in NTT domain, accumulate into r, +* and multiply by 2^-16. +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + poly t; + + poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); + for(i=1;ivec[i], &b->vec[i]); + poly_add(r, r, &t); + } + + poly_reduce(r); +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials; +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - polyvec *r: pointer to input/output polynomial +**************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for(i=0;ivec[i]); +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) +{ + unsigned int i; + for(i=0;ivec[i], &a->vec[i], &b->vec[i]); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.h new file mode 100644 index 0000000000..57b605494e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.h @@ -0,0 +1,36 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct{ + poly vec[KYBER_K]; +} polyvec; + +#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) +void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); +#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) +void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); + +#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) +void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); +#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) +void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); + +#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) +void polyvec_ntt(polyvec *r); +#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) +void polyvec_invntt_tomont(polyvec *r); + +#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); + +#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) +void polyvec_reduce(polyvec *r); + +#define polyvec_add KYBER_NAMESPACE(polyvec_add) +void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.c new file mode 100644 index 0000000000..9d8e7edf83 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.c @@ -0,0 +1,42 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: Montgomery reduction; given a 32-bit integer a, computes +* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 +* +* Arguments: - int32_t a: input integer to be reduced; +* has to be in {-q2^15,...,q2^15-1} +* +* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. +**************************************************/ +int16_t montgomery_reduce(int32_t a) +{ + int16_t t; + + t = (int16_t)a*QINV; + t = (a - (int32_t)t*KYBER_Q) >> 16; + return t; +} + +/************************************************* +* Name: barrett_reduce +* +* Description: Barrett reduction; given a 16-bit integer a, computes +* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} +* +* Arguments: - int16_t a: input integer to be reduced +* +* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. +**************************************************/ +int16_t barrett_reduce(int16_t a) { + int16_t t; + const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; + + t = ((int32_t)v*a + (1<<25)) >> 26; + t *= KYBER_Q; + return a - t; +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.h new file mode 100644 index 0000000000..c1bc1e4c7b --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.h @@ -0,0 +1,16 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -1044 // 2^16 mod q +#define QINV -3327 // q^-1 mod 2^16 + +#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a); + +#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a); + +#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..20f451882e --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric-shake.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. +* +* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state +* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ +void kyber_shake128_absorb(shake128incctx *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) +{ + uint8_t extseed[KYBER_SYMBYTES+2]; + + memcpy(extseed, seed, KYBER_SYMBYTES); + extseed[KYBER_SYMBYTES+0] = x; + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[KYBER_SYMBYTES+1]; + + memcpy(extkey, key, KYBER_SYMBYTES); + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +/************************************************* +* Name: kyber_shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) +* - uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, KYBER_SYMBYTES); + shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, KYBER_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric.h new file mode 100644 index 0000000000..2acc66f98d --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric.h @@ -0,0 +1,35 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx xof_state; + +#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) +void kyber_shake128_absorb(shake128incctx *s, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y); + +#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) +void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); + +#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) +void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); + +#define XOF_BLOCKBYTES SHAKE128_RATE + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_init(STATE, SEED) shake128_inc_init(STATE) +#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define xof_release(STATE) shake128_inc_ctx_release(STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/verify.c new file mode 100644 index 0000000000..ed4a6541f8 --- /dev/null +++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768-ipd_ref/verify.c @@ -0,0 +1,47 @@ +#include +#include +#include "verify.h" + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +int verify(const uint8_t *a, const uint8_t *b, size_t len) +{ + size_t i; + uint8_t r = 0; + + for(i=0;i> 63; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +{ + size_t i; + + b = -b; + for(i=0;i +#include +#include "params.h" + +#define verify KYBER_NAMESPACE(verify) +int verify(const uint8_t *a, const uint8_t *b, size_t len); + +#define cmov KYBER_NAMESPACE(cmov) +void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake index 8fba8a160f..4abe5c2aed 100644 --- a/src/oqsconfig.h.cmake +++ b/src/oqsconfig.h.cmake @@ -109,6 +109,17 @@ #cmakedefine OQS_ENABLE_KEM_kyber_1024_avx2 1 #cmakedefine OQS_ENABLE_KEM_kyber_1024_aarch64 1 +#cmakedefine OQS_ENABLE_KEM_ML_KEM 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_512_ipd 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_512 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_512_ipd_avx2 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_768_ipd 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_768 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_768_ipd_avx2 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_ipd 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2 1 + #cmakedefine OQS_ENABLE_SIG_DILITHIUM 1 #cmakedefine OQS_ENABLE_SIG_dilithium_2 1 #cmakedefine OQS_ENABLE_SIG_dilithium_2_avx2 1 @@ -120,6 +131,17 @@ #cmakedefine OQS_ENABLE_SIG_dilithium_5_avx2 1 #cmakedefine OQS_ENABLE_SIG_dilithium_5_aarch64 1 +#cmakedefine OQS_ENABLE_SIG_ML_DSA 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_44_ipd 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_44 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_65_ipd 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_65 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_87_ipd 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_87 1 +#cmakedefine OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2 1 + #cmakedefine OQS_ENABLE_SIG_FALCON 1 #cmakedefine OQS_ENABLE_SIG_falcon_512 1 #cmakedefine OQS_ENABLE_SIG_falcon_512_avx2 1 diff --git a/src/sig/dilithium/sig_dilithium.h b/src/sig/dilithium/sig_dilithium.h index b63ea73b4b..2e24f58fe8 100644 --- a/src/sig/dilithium/sig_dilithium.h +++ b/src/sig/dilithium/sig_dilithium.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_SIG_dilithium_2 +#if defined(OQS_ENABLE_SIG_dilithium_2) #define OQS_SIG_dilithium_2_length_public_key 1312 #define OQS_SIG_dilithium_2_length_secret_key 2528 #define OQS_SIG_dilithium_2_length_signature 2420 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_2_sign(uint8_t *signature, size_t *signatur OQS_API OQS_STATUS OQS_SIG_dilithium_2_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_dilithium_3 +#if defined(OQS_ENABLE_SIG_dilithium_3) #define OQS_SIG_dilithium_3_length_public_key 1952 #define OQS_SIG_dilithium_3_length_secret_key 4000 #define OQS_SIG_dilithium_3_length_signature 3293 @@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_3_sign(uint8_t *signature, size_t *signatur OQS_API OQS_STATUS OQS_SIG_dilithium_3_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_dilithium_5 +#if defined(OQS_ENABLE_SIG_dilithium_5) #define OQS_SIG_dilithium_5_length_public_key 2592 #define OQS_SIG_dilithium_5_length_secret_key 4864 #define OQS_SIG_dilithium_5_length_signature 4595 diff --git a/src/sig/falcon/sig_falcon.h b/src/sig/falcon/sig_falcon.h index 95b1d2bcfc..dfd43e88be 100644 --- a/src/sig/falcon/sig_falcon.h +++ b/src/sig/falcon/sig_falcon.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_SIG_falcon_512 +#if defined(OQS_ENABLE_SIG_falcon_512) #define OQS_SIG_falcon_512_length_public_key 897 #define OQS_SIG_falcon_512_length_secret_key 1281 #define OQS_SIG_falcon_512_length_signature 666 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_SIG_falcon_512_sign(uint8_t *signature, size_t *signature OQS_API OQS_STATUS OQS_SIG_falcon_512_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_falcon_1024 +#if defined(OQS_ENABLE_SIG_falcon_1024) #define OQS_SIG_falcon_1024_length_public_key 1793 #define OQS_SIG_falcon_1024_length_secret_key 2305 #define OQS_SIG_falcon_1024_length_signature 1280 diff --git a/src/sig/ml_dsa/CMakeLists.txt b/src/sig/ml_dsa/CMakeLists.txt new file mode 100644 index 0000000000..128cd0cf2a --- /dev/null +++ b/src/sig/ml_dsa/CMakeLists.txt @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: MIT + +# This file was generated by +# scripts/copy_from_upstream/copy_from_upstream.py + +set(_ML_DSA_OBJS "") + +if(OQS_ENABLE_SIG_ml_dsa_44_ipd) + add_library(ml_dsa_44_ipd_ref OBJECT sig_ml_dsa_44_ipd.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric-shake.c) + target_compile_options(ml_dsa_44_ipd_ref PUBLIC -DDILITHIUM_MODE=2) + target_include_directories(ml_dsa_44_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref) + target_include_directories(ml_dsa_44_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_44_ipd_ref PUBLIC -DDILITHIUM_MODE=2) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +if(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2) + add_library(ml_dsa_44_ipd_avx2 OBJECT pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/invntt.S pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.S pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/pointwise.S pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rounding.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.S pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.c pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric-shake.c) + target_include_directories(ml_dsa_44_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2) + target_include_directories(ml_dsa_44_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_44_ipd_avx2 PRIVATE -mavx2 -mpopcnt) + target_compile_options(ml_dsa_44_ipd_avx2 PUBLIC -DDILITHIUM_MODE=2) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +if(OQS_ENABLE_SIG_ml_dsa_65_ipd) + add_library(ml_dsa_65_ipd_ref OBJECT sig_ml_dsa_65_ipd.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric-shake.c) + target_compile_options(ml_dsa_65_ipd_ref PUBLIC -DDILITHIUM_MODE=3) + target_include_directories(ml_dsa_65_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref) + target_include_directories(ml_dsa_65_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_65_ipd_ref PUBLIC -DDILITHIUM_MODE=3) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +if(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2) + add_library(ml_dsa_65_ipd_avx2 OBJECT pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/invntt.S pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.S pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/pointwise.S pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rounding.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.S pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.c pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric-shake.c) + target_include_directories(ml_dsa_65_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2) + target_include_directories(ml_dsa_65_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_65_ipd_avx2 PRIVATE -mavx2 -mpopcnt) + target_compile_options(ml_dsa_65_ipd_avx2 PUBLIC -DDILITHIUM_MODE=3) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +if(OQS_ENABLE_SIG_ml_dsa_87_ipd) + add_library(ml_dsa_87_ipd_ref OBJECT sig_ml_dsa_87_ipd.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric-shake.c) + target_compile_options(ml_dsa_87_ipd_ref PUBLIC -DDILITHIUM_MODE=5) + target_include_directories(ml_dsa_87_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref) + target_include_directories(ml_dsa_87_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_87_ipd_ref PUBLIC -DDILITHIUM_MODE=5) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +if(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2) + add_library(ml_dsa_87_ipd_avx2 OBJECT pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/invntt.S pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.S pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/pointwise.S pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rounding.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.S pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.c pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric-shake.c) + target_include_directories(ml_dsa_87_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2) + target_include_directories(ml_dsa_87_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) + target_compile_options(ml_dsa_87_ipd_avx2 PRIVATE -mavx2 -mpopcnt) + target_compile_options(ml_dsa_87_ipd_avx2 PUBLIC -DDILITHIUM_MODE=5) + set(_ML_DSA_OBJS ${_ML_DSA_OBJS} $) +endif() + +set(ML_DSA_OBJS ${_ML_DSA_OBJS} PARENT_SCOPE) diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/align.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/align.h new file mode 100644 index 0000000000..33fac1d968 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[(N+7)/8]; \ + } + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/api.h new file mode 100644 index 0000000000..55b637669d --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_avx2_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_avx2_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_avx2_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_avx2_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_avx2_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_avx2_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_avx2_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_avx2_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_avx2_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/config.h new file mode 100644 index 0000000000..e59f81a5e8 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_avx2_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_avx2_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_avx2_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.c new file mode 100644 index 0000000000..414d99eceb --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.c @@ -0,0 +1,100 @@ +#include +#include "params.h" +#include "consts.h" + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT -4186625 // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV -8395782 + +const qdata_t qdata = {{ +#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, +}}; diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.h new file mode 100644 index 0000000000..930d2f09b3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/consts.h @@ -0,0 +1,38 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define _cdecl(s) decorate(s) +#define cdecl(s) _cdecl(DILITHIUM_NAMESPACE(##s)) +#else +#define cdecl(s) DILITHIUM_NAMESPACE(##s) +#endif + +#ifndef __ASSEMBLER__ + +#include "align.h" + +typedef ALIGNED_INT32(624) qdata_t; + +#define qdata DILITHIUM_NAMESPACE(qdata) +extern const qdata_t qdata; + +#endif +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/invntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/invntt.S new file mode 100644 index 0000000000..3e9864c994 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/invntt.S @@ -0,0 +1,238 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.S new file mode 100644 index 0000000000..38415de893 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.S @@ -0,0 +1,197 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.h new file mode 100644 index 0000000000..0c4fbdd342 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include + +#define ntt_avx DILITHIUM_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *a, const __m256i *qdata); +#define invntt_avx DILITHIUM_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *a, const __m256i *qdata); + +#define nttunpack_avx DILITHIUM_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *a); + +#define pointwise_avx DILITHIUM_NAMESPACE(pointwise_avx) +void pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); +#define pointwise_acc_avx DILITHIUM_NAMESPACE(pointwise_acc_avx) +void pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/pointwise.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/pointwise.S new file mode 100644 index 0000000000..ae7ff7995c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/pointwise.S @@ -0,0 +1,211 @@ +#include "params.h" +#include "consts.h" + +.text +.global cdecl(pointwise_avx) +cdecl(pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(pointwise_acc_avx) +cdecl(pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +#if L >= 3 +pointwise 2048 +acc +#endif + +#if L >= 4 +pointwise 3072 +acc +#endif + +#if L >= 5 +pointwise 4096 +acc +#endif + +#if L >= 6 +pointwise 5120 +acc +#endif + +#if L >= 7 +pointwise 6144 +acc +#endif + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.c new file mode 100644 index 0000000000..25d36828ad --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.c @@ -0,0 +1,1138 @@ +#include +#include +#include +#include "align.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" +#include "symmetric.h" +#include "fips202x4.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i off = _mm256_set1_epi32(1<<22); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f,off); + g = _mm256_srai_epi32(g,23); + g = _mm256_mullo_epi32(g,q); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero,q,f); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f,D); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +void poly_nttunpack(poly *a) { + DBENCH_START(); + + nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + pointwise_avx(c->vec, a->vec, b->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) +{ + unsigned int r; + DBENCH_START(); + + r = make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) +{ + DBENCH_START(); + + use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f,t; + const __m256i bound = _mm256_set1_epi32(B-1); + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + t = _mm256_setzero_si256(); + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f,bound); + t = _mm256_or_si256(t,f); + } + + r = 1 - _mm256_testz_si256(t,t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_preinit(poly *a, stream128_state *state) +{ + unsigned int ctr; + /* rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = rej_uniform_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + stream128_state state; + stream128_init(&state, seed, nonce); + poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf[4]; + shake128x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec,f); + _mm256_store_si256(buf[1].vec,f); + _mm256_store_si256(buf[2].vec,f); + _mm256_store_si256(buf[3].vec,f); + + buf[0].coeffs[SEEDBYTES+0] = nonce0; + buf[0].coeffs[SEEDBYTES+1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES+0] = nonce1; + buf[1].coeffs[SEEDBYTES+1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES+0] = nonce2; + buf[2].coeffs[SEEDBYTES+1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES+0] = nonce3; + buf[3].coeffs[SEEDBYTES+1] = nonce3 >> 8; + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_eta_preinit(poly *a, stream256_state *state) +{ + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf; + + stream256_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = rej_eta_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + stream256_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM256_BLOCKBYTES); + } +} + +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_eta_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE256_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE256_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE256_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE256_RATE); + } + shake256x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state) +{ + /* polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + polyz_unpack(a, buf.coeffs); +} + +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf[4]; + shake256x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + polyz_unpack(a0, buf[0].coeffs); + polyz_unpack(a1, buf[1].coeffs); + polyz_unpack(a2, buf[2].coeffs); + polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf.coeffs, 1, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly * restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly * restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly * restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +#if GAMMA1 == (1 << 17) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 9, 8, 7,-1, 7, 6, 5,-1, 5, 4, 3,-1, 3, 2, 1, + -1, 8, 7, 6,-1, 6, 5, 4,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(6,4,2,0,6,4,2,0); + const __m256i mask = _mm256_set1_epi32(0x3FFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[18*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA1 == (1 << 19) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1,11,10, 9,-1, 9, 8, 7,-1, 6, 5, 4,-1, 4, 3, 2, + -1, 9, 8, 7,-1, 7, 6, 5,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set1_epi64x((uint64_t)4 << 32); + const __m256i mask = _mm256_set1_epi32(0xFFFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[20*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} +#endif + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +#if GAMMA2 == (Q-1)/88 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0,f1,f2,f3; + const __m256i shift1 = _mm256_set1_epi16((64 << 8) + 1); + const __m256i shift2 = _mm256_set1_epi32((4096 << 16) + 1); + const __m256i shufdidx1 = _mm256_set_epi32(7,3,6,2,5,1,4,0); + const __m256i shufdidx2 = _mm256_set_epi32(-1,-1,6,5,4,2,1,0); + const __m256i shufbidx = _mm256_set_epi8(-1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0, + -1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/32; i++) { + f0 = _mm256_load_si256(&a->vec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift1); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx2); + _mm256_storeu_si256((__m256i *)&r[24*i],f0); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA2 == (Q-1)/32 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3, f4, f5, f6, f7; + const __m256i shift = _mm256_set1_epi16((16 << 8) + 1); + const __m256i shufbidx = _mm256_set_epi8(15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0, + 15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/64; ++i) { + f0 = _mm256_load_si256(&a->vec[8*i+0]); + f1 = _mm256_load_si256(&a->vec[8*i+1]); + f2 = _mm256_load_si256(&a->vec[8*i+2]); + f3 = _mm256_load_si256(&a->vec[8*i+3]); + f4 = _mm256_load_si256(&a->vec[8*i+4]); + f5 = _mm256_load_si256(&a->vec[8*i+5]); + f6 = _mm256_load_si256(&a->vec[8*i+6]); + f7 = _mm256_load_si256(&a->vec[8*i+7]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f2 = _mm256_packus_epi32(f4,f5); + f3 = _mm256_packus_epi32(f6,f7); + f0 = _mm256_packus_epi16(f0,f1); + f1 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift); + f1 = _mm256_maddubs_epi16(f1,shift); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_permute4x64_epi64(f0,0xD8); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + _mm256_storeu_si256((__m256i *)&r[32*i], f0); + } + + DBENCH_STOP(*tpack); +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.h new file mode 100644 index 0000000000..7bcd8e5e03 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/poly.h @@ -0,0 +1,112 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" +#include "symmetric.h" + +typedef ALIGNED_INT32(N) poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_nttunpack DILITHIUM_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform_preinit DILITHIUM_NAMESPACE(poly_uniform_preinit) +void poly_uniform_preinit(poly *a, stream128_state *state); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +#define poly_uniform_eta_preinit DILITHIUM_NAMESPACE(poly_uniform_eta_preinit) +void poly_uniform_eta_preinit(poly *a, stream256_state *state); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_uniform_gamma1_preinit DILITHIUM_NAMESPACE(poly_uniform_gamma1_preinit) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define poly_uniform_4x DILITHIUM_NAMESPACE(poly_uniform_4x) +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_eta_4x DILITHIUM_NAMESPACE(poly_uniform_eta_4x) +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_gamma1_4x DILITHIUM_NAMESPACE(poly_uniform_gamma1_4x) +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..6e2302168e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.c @@ -0,0 +1,588 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ + +#if K == 4 && L == 4 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], NULL, rho); + polyvec_matrix_expand_row1(&mat[1], NULL, rho); + polyvec_matrix_expand_row2(&mat[2], NULL, rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 256, 257, 258, 259); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 512, 513, 514, 515); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 768, 769, 770, 771); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +#elif K == 6 && L == 5 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvecl tmp; + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &tmp, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 4, 256, 257, 258); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 259, 260, 512, 513); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowb->vec[0], rho, 514, 515, 516, 768); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 769, 770, 771, 772); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1028, 1280, 1281, 1282); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 1283, 1284, 1536, 1537); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +#elif K == 8 && L == 7 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &mat[6], rho); + polyvec_matrix_expand_row6(&mat[6], &mat[7], rho); + polyvec_matrix_expand_row7(&mat[7], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 4, 5, 6, 256); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 257, 258, 259, 260); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 261, 262, 512, 513); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 514, 515, 516, 517); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 518, 768, 769, 770); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 771, 772, 773, 774); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 1028, 1029, 1030, 1280); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 1281, 1282, 1283, 1284); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 1285, 1286, 1536, 1537); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 1538, 1539, 1540, 1541); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1542, 1792, 1793, 1794); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row7(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 1795, 1796, 1797, 1798); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +#else +#error +#endif + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, qdata.vec); +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) +{ + unsigned int i, n = 0; + + for(i = 0; i < K; ++i) + n += poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + + return n; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..1b6dc87ac6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/polyvec.h @@ -0,0 +1,105 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_expand_row0 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row0) +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row1 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row1) +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row2 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row2) +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row3 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row3) +void polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row4 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row4) +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row5 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row5) +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row6 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row6) +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row7 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row7) +void polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..8b1dde4440 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.c @@ -0,0 +1,476 @@ +#include +#include +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" + +const uint8_t idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int rej_uniform_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]) +{ + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1,15,14,13,-1,12,11,10, + -1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6, + -1, 5, 4, 3,-1, 2, 1, 0); + + ctr = pos = 0; + while(pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if(ctr > N - 8) break; + } + + uint32_t t; + while(ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + r[ctr++] = t; + } + + return ctr; +} + +#if ETA == 2 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1, f2; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(ETA); + const __m256i bound = mask; + const __m256i v = _mm256_set1_epi32(-6560); + const __m256i p = _mm256_set1_epi32(5); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + r[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < N) { + t1 = t1 - (205*t1 >> 10)*5; + r[ctr++] = 2 - t1; + } + } + + return ctr; +} + +#elif ETA == 4 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(4); + const __m256i bound = _mm256_set1_epi8(9); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 9) + r[ctr++] = 4 - t0; + if(t1 < 9 && ctr < N) + r[ctr++] = 4 - t1; + } + + return ctr; +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..61f3f357a5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rejsample.h @@ -0,0 +1,28 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#if ETA == 2 +#define REJ_UNIFORM_ETA_NBLOCKS ((136+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define REJ_UNIFORM_ETA_NBLOCKS ((227+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#endif +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES) + +#define idxlut DILITHIUM_NAMESPACE(idxlut) +extern const uint8_t idxlut[256][8]; + +#define rej_uniform_avx DILITHIUM_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]); + +#define rej_eta_avx DILITHIUM_NAMESPACE(rej_eta_avx) +unsigned int rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]); + +#endif + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rounding.c new file mode 100644 index 0000000000..3ada656776 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/rounding.c @@ -0,0 +1,200 @@ +#include +#include +#include +#include "params.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D-1)) - 1); + + for(i = 0; i < N/8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f,half); + f0 = _mm256_and_si256(f1,mask); + f1 = _mm256_srli_epi32(f1,D); + f0 = _mm256_sub_epi32(f,f0); + _mm256_store_si256(&a1[i],f1); + _mm256_store_si256(&a0[i],f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +#if GAMMA2 == (Q-1)/32 +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i hq = _mm256_srli_epi32(q,1); + const __m256i v = _mm256_set1_epi32(1025); + const __m256i alpha = _mm256_set1_epi32(2*GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(512); + const __m256i mask = _mm256_set1_epi32(15); + + for(i=0;i +#include +#include "params.h" + +#define power2round_avx DILITHIUM_NAMESPACE(power2round_avx) +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define decompose_avx DILITHIUM_NAMESPACE(decompose_avx) +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define make_hint_avx DILITHIUM_NAMESPACE(make_hint_avx) +unsigned int make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +#define use_hint_avx DILITHIUM_NAMESPACE(use_hint_avx) +void use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..133e05132b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.S @@ -0,0 +1,52 @@ +#include "consts.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.inc b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.c new file mode 100644 index 0000000000..a39f8515c4 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.c @@ -0,0 +1,445 @@ +#include +#include +#include "align.h" +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch(i) { + case 0: + polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; +#if K > 4 + case 4: + polyvec_matrix_expand_row4(buf, buf + 1, rho); + *row = buf; + break; + case 5: + polyvec_matrix_expand_row5(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif +#if K > 6 + case 6: + polyvec_matrix_expand_row6(buf, buf + 1, rho); + *row = buf; + break; + case 7: + polyvec_matrix_expand_row7(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif + } +} + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ +#if K == 4 && L == 4 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s2.vec[0], &s2.vec[1], &s2.vec[2], &s2.vec[3], rhoprime, 4, 5, 6, 7); +#elif K == 6 && L == 5 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s2.vec[0], &s2.vec[1], &s2.vec[2], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[3], &s2.vec[4], &s2.vec[5], &t0, rhoprime, 8, 9, 10, 11); +#elif K == 8 && L == 7 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s1.vec[5], &s1.vec[6], &s2.vec[0], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[1], &s2.vec[2], &s2.vec[3], &s2.vec[4], rhoprime, 8, 9, 10, 11); + poly_uniform_eta_4x(&s2.vec[5], &s2.vec[6], &s2.vec[7], &t0, rhoprime, 12, 13, 14, 15); +#else +#error +#endif + + /* Pack secret vectors */ + for(i = 0; i < L; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + i*POLYETA_PACKEDBYTES, &s1.vec[i]); + for(i = 0; i < K; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + + /* Transform s1 */ + polyvecl_ntt(&s1); + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + poly_invntt_tomont(&t1); + + /* Add error polynomial */ + poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + poly_caddq(&t1); + poly_power2round(&t1, &t0, &t1); + polyt1_pack(pk + SEEDBYTES + i*POLYT1_PACKEDBYTES, &t1); + polyt0_pack(sk + 2*SEEDBYTES + TRBYTES + (L+K)*POLYETA_PACKEDBYTES + i*POLYT0_PACKEDBYTES, &t0); + } + + /* Compute H(rho, t1) and store in secret key */ + shake256(sk + 2*SEEDBYTES, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *rnd, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + memset(rnd, 0, RNDBYTES); +#endif + shake256(rhoprime, CRHBYTES, key, SEEDBYTES + RNDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + polyvec_matrix_expand(mat, rho); + polyvecl_ntt(&s1); + polyveck_ntt(&s2); + polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ +#if L == 4 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + nonce += 4; +#elif L == 5 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1(&z.vec[4], rhoprime, nonce + 4); + nonce += 5; +#elif L == 7 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1_4x(&z.vec[4], &z.vec[5], &z.vec[6], &tmp, + rhoprime, nonce + 4, nonce + 5, nonce + 6, 0); + nonce += 7; +#else +#error +#endif + + /* Matrix-vector product */ + tmpv.y = z; + polyvecl_ntt(&tmpv.y); + polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + polyveck_caddq(&w1); + polyveck_decompose(&w1, &tmpv.w0, &w1); + polyveck_pack_w1(sig, &w1); + + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, CTILDEBYTES, &state); + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for(i = 0; i < L; i++) { + poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + poly_invntt_tomont(&tmp); + poly_add(&z.vec[i], &z.vec[i], &tmp); + poly_reduce(&z.vec[i]); + if(poly_chknorm(&z.vec[i], GAMMA1 - BETA)) + goto rej; + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for(i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + poly_invntt_tomont(&tmp); + poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + poly_reduce(&tmpv.w0.vec[i]); + if(poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) + goto rej; + + /* Compute hints */ + poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + poly_invntt_tomont(&tmp); + poly_reduce(&tmp); + if(poly_chknorm(&tmp, GAMMA2)) + goto rej; + + poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if(pos + n > OMEGA) + goto rej; + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + shake256_inc_ctx_release(&state); + /* Pack z into signature */ + for(i = 0; i < L; i++) + polyz_pack(sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES, &z.vec[i]); + + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K*POLYW1_PACKEDBYTES+14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand challenge */ + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for(i = 0; i < L; i++) { + polyz_unpack(&z.vec[i], sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES); + poly_ntt(&z.vec[i]); + } + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + polyt1_unpack(&h, pk + SEEDBYTES + i*POLYT1_PACKEDBYTES); + poly_shiftl(&h); + poly_ntt(&h); + poly_pointwise_montgomery(&h, &c, &h); + + poly_sub(&w1, &w1, &h); + poly_reduce(&w1); + poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if(hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) + return -1; + + for(j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > pos && hint[j] <= hint[j-1]) return -1; + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + poly_caddq(&w1); + poly_use_hint(&w1, &w1, &h); + polyw1_pack(buf.coeffs + i*POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for(j = pos; j < OMEGA; ++j) + if(hint[j]) return -1; + + /* Call random oracle and verify challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(buf.coeffs[i] != sig[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..fa49963ae3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_avx2/symmetric.h @@ -0,0 +1,28 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/api.h new file mode 100644 index 0000000000..78caa5c728 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_ref_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_ref_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_ref_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_ref_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_ref_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_ref_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_ref_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_ref_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_ref_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/config.h new file mode 100644 index 0000000000..eddf13f5ea --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_ref_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_ref_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_ref_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.c new file mode 100644 index 0000000000..5ea8b530e1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.c @@ -0,0 +1,98 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for(len = 128; len > 0; len >>= 1) { + for(start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for(j = start; j < start + len; ++j) { + t = montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for(len = 1; len < N; len <<= 1) { + for(start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for(j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for(j = 0; j < N; ++j) { + a[j] = montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.h new file mode 100644 index 0000000000..731132d5cd --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/ntt.h @@ -0,0 +1,13 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define ntt DILITHIUM_NAMESPACE(ntt) +void ntt(int32_t a[N]); + +#define invntt_tomont DILITHIUM_NAMESPACE(invntt_tomont) +void invntt_tomont(int32_t a[N]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.c new file mode 100644 index 0000000000..7983aacdd1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.c @@ -0,0 +1,911 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = reduce32(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = caddq(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] <<= D; + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = power2round(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = decompose(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for(i = 0; i < N; ++i) { + h->coeffs[i] = make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + b->coeffs[i] = use_hint(a->coeffs[i], h->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for(i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2*a->coeffs[i]); + + if(t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) +{ + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while(ctr < N) { + off = buflen % 3; + for(i = 0; i < off; ++i) + buf[i] = buf[buflen - off + i]; + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#if ETA == 2 +#define POLY_UNIFORM_ETA_NBLOCKS ((136 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define POLY_UNIFORM_ETA_NBLOCKS ((227 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#endif +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while(ctr < N) { + stream256_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM256_BLOCKBYTES); + } + stream256_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + polyz_unpack(a, buf); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_squeezeblocks(buf, 1, &state); + + signs = 0; + for(i = 0; i < 8; ++i) + signs |= (uint64_t)buf[i] << 8*i; + pos = 8; + + for(i = 0; i < N; ++i) + c->coeffs[i] = 0; + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf, 1, &state); + pos = 0; + } + + b = buf[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = a[9*i+0]; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+1] << 8; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+2] << 16; + r->coeffs[4*i+0] &= 0x3FFFF; + + r->coeffs[4*i+1] = a[9*i+2] >> 2; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+3] << 6; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+4] << 14; + r->coeffs[4*i+1] &= 0x3FFFF; + + r->coeffs[4*i+2] = a[9*i+4] >> 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+5] << 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+6] << 12; + r->coeffs[4*i+2] &= 0x3FFFF; + + r->coeffs[4*i+3] = a[9*i+6] >> 6; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+7] << 2; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+8] << 10; + r->coeffs[4*i+3] &= 0x3FFFF; + + r->coeffs[4*i+0] = GAMMA1 - r->coeffs[4*i+0]; + r->coeffs[4*i+1] = GAMMA1 - r->coeffs[4*i+1]; + r->coeffs[4*i+2] = GAMMA1 - r->coeffs[4*i+2]; + r->coeffs[4*i+3] = GAMMA1 - r->coeffs[4*i+3]; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[5*i+0]; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+1] << 8; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+2] << 16; + r->coeffs[2*i+0] &= 0xFFFFF; + + r->coeffs[2*i+1] = a[5*i+2] >> 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+3] << 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+4] << 12; + /* r->coeffs[2*i+1] &= 0xFFFFF; */ /* No effect, since we're anyway at 20 bits */ + + r->coeffs[2*i+0] = GAMMA1 - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = GAMMA1 - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA2 == (Q-1)/88 + for(i = 0; i < N/4; ++i) { + r[3*i+0] = a->coeffs[4*i+0]; + r[3*i+0] |= a->coeffs[4*i+1] << 6; + r[3*i+1] = a->coeffs[4*i+1] >> 2; + r[3*i+1] |= a->coeffs[4*i+2] << 4; + r[3*i+2] = a->coeffs[4*i+2] >> 4; + r[3*i+2] |= a->coeffs[4*i+3] << 2; + } +#elif GAMMA2 == (Q-1)/32 + for(i = 0; i < N/2; ++i) + r[i] = a->coeffs[2*i+0] | (a->coeffs[2*i+1] << 4); +#endif + + DBENCH_STOP(*tpack); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.h new file mode 100644 index 0000000000..d2fd989b6a --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/poly.h @@ -0,0 +1,79 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +typedef struct { + int32_t coeffs[N]; +} poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t *r, const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t *a); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t *r, const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t *a); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t *r, const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t *a); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t *r, const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.c new file mode 100644 index 0000000000..40032b656b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.c @@ -0,0 +1,389 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for(i = 0; i < K; ++i) + for(j = 0; j < L; ++j) + poly_uniform(&mat[i].vec[j], rho, (i << 8) + j); +} + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) +{ + unsigned int i; + poly t; + + poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for(i = 1; i < L; ++i) { + poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + poly_add(w, w, &t); + } +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) +{ + unsigned int i, s = 0; + + for(i = 0; i < K; ++i) + s += poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + + return s; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.h new file mode 100644 index 0000000000..615ac52990 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/polyvec.h @@ -0,0 +1,93 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.c new file mode 100644 index 0000000000..75feff8bc5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.c @@ -0,0 +1,69 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t montgomery_reduce(int64_t a) { + int32_t t; + + t = (int64_t)(int32_t)a*QINV; + t = (a - (int64_t)t*Q) >> 32; + return t; +} + +/************************************************* +* Name: reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t*Q; + return t; +} + +/************************************************* +* Name: caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t freeze(int32_t a) { + a = reduce32(a); + a = caddq(a); + return a; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.h new file mode 100644 index 0000000000..26d9b4ee2e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/reduce.h @@ -0,0 +1,22 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -4186625 // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +#define montgomery_reduce DILITHIUM_NAMESPACE(montgomery_reduce) +int32_t montgomery_reduce(int64_t a); + +#define reduce32 DILITHIUM_NAMESPACE(reduce32) +int32_t reduce32(int32_t a); + +#define caddq DILITHIUM_NAMESPACE(caddq) +int32_t caddq(int32_t a); + +#define freeze DILITHIUM_NAMESPACE(freeze) +int32_t freeze(int32_t a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.c new file mode 100644 index 0000000000..889f0a296b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.c @@ -0,0 +1,102 @@ +#include +#include "params.h" +#include "rounding.h" + +/************************************************* +* Name: power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D-1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; +#if GAMMA2 == (Q-1)/32 + a1 = (a1*1025 + (1 << 21)) >> 22; + a1 &= 15; +#elif GAMMA2 == (Q-1)/88 + a1 = (a1*11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; +#endif + + *a0 = a - a1*2*GAMMA2; + *a0 -= (((Q-1)/2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int make_hint(int32_t a0, int32_t a1) { + if(a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) + return 1; + + return 0; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = decompose(&a0, a); + if(hint == 0) + return a1; + +#if GAMMA2 == (Q-1)/32 + if(a0 > 0) + return (a1 + 1) & 15; + else + return (a1 - 1) & 15; +#elif GAMMA2 == (Q-1)/88 + if(a0 > 0) + return (a1 == 43) ? 0 : a1 + 1; + else + return (a1 == 0) ? 43 : a1 - 1; +#endif +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.h new file mode 100644 index 0000000000..b72e8e8d66 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/rounding.h @@ -0,0 +1,19 @@ +#ifndef ROUNDING_H +#define ROUNDING_H + +#include +#include "params.h" + +#define power2round DILITHIUM_NAMESPACE(power2round) +int32_t power2round(int32_t *a0, int32_t a); + +#define decompose DILITHIUM_NAMESPACE(decompose) +int32_t decompose(int32_t *a0, int32_t a); + +#define make_hint DILITHIUM_NAMESPACE(make_hint) +unsigned int make_hint(int32_t a0, int32_t a1); + +#define use_hint DILITHIUM_NAMESPACE(use_hint) +int32_t use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.c new file mode 100644 index 0000000000..9298ad2177 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.c @@ -0,0 +1,341 @@ +#include +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + uint8_t tr[TRBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Expand matrix */ + polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + polyvecl_uniform_eta(&s1, rhoprime, 0); + polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + polyvecl_ntt(&s1hat); + polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + polyveck_reduce(&t1); + polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + polyveck_caddq(&t1); + polyveck_power2round(&t1, &t0, &t1); + pack_pk(pk, rho, &t1); + + /* Compute H(rho, t1) and write secret key */ + shake256(tr, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + unsigned int n; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime, *rnd; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + + /* Compute mu = CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + for(n=0;n OMEGA) + goto rej; + + shake256_inc_ctx_release(&state); + + /* Write signature */ + pack_sig(sig, sig, &z, &h); + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) +{ + unsigned int i; + uint8_t buf[K*POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[CTILDEBYTES]; + uint8_t c2[CTILDEBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + unpack_pk(rho, &t1, pk); + if(unpack_sig(c, &z, &h, sig)) + return -1; + if(polyvecl_chknorm(&z, GAMMA1 - BETA)) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + poly_challenge(&cp, c); /* uses only the first SEEDBYTES bytes of c */ + polyvec_matrix_expand(mat, rho); + + polyvecl_ntt(&z); + polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + poly_ntt(&cp); + polyveck_shiftl(&t1); + polyveck_ntt(&t1); + polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + polyveck_sub(&w1, &w1, &t1); + polyveck_reduce(&w1); + polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + polyveck_caddq(&w1); + polyveck_use_hint(&w1, &w1, &h); + polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify challenge */ + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(c[i] != c2[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) +{ + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric.h new file mode 100644 index 0000000000..211de3b860 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-44-ipd_ref/symmetric.h @@ -0,0 +1,36 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) \ + dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/align.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/align.h new file mode 100644 index 0000000000..33fac1d968 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[(N+7)/8]; \ + } + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/api.h new file mode 100644 index 0000000000..55b637669d --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_avx2_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_avx2_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_avx2_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_avx2_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_avx2_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_avx2_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_avx2_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_avx2_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_avx2_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/config.h new file mode 100644 index 0000000000..e59f81a5e8 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_avx2_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_avx2_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_avx2_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.c new file mode 100644 index 0000000000..414d99eceb --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.c @@ -0,0 +1,100 @@ +#include +#include "params.h" +#include "consts.h" + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT -4186625 // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV -8395782 + +const qdata_t qdata = {{ +#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, +}}; diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.h new file mode 100644 index 0000000000..930d2f09b3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/consts.h @@ -0,0 +1,38 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define _cdecl(s) decorate(s) +#define cdecl(s) _cdecl(DILITHIUM_NAMESPACE(##s)) +#else +#define cdecl(s) DILITHIUM_NAMESPACE(##s) +#endif + +#ifndef __ASSEMBLER__ + +#include "align.h" + +typedef ALIGNED_INT32(624) qdata_t; + +#define qdata DILITHIUM_NAMESPACE(qdata) +extern const qdata_t qdata; + +#endif +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/invntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/invntt.S new file mode 100644 index 0000000000..3e9864c994 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/invntt.S @@ -0,0 +1,238 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.S new file mode 100644 index 0000000000..38415de893 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.S @@ -0,0 +1,197 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.h new file mode 100644 index 0000000000..0c4fbdd342 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include + +#define ntt_avx DILITHIUM_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *a, const __m256i *qdata); +#define invntt_avx DILITHIUM_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *a, const __m256i *qdata); + +#define nttunpack_avx DILITHIUM_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *a); + +#define pointwise_avx DILITHIUM_NAMESPACE(pointwise_avx) +void pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); +#define pointwise_acc_avx DILITHIUM_NAMESPACE(pointwise_acc_avx) +void pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/pointwise.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/pointwise.S new file mode 100644 index 0000000000..ae7ff7995c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/pointwise.S @@ -0,0 +1,211 @@ +#include "params.h" +#include "consts.h" + +.text +.global cdecl(pointwise_avx) +cdecl(pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(pointwise_acc_avx) +cdecl(pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +#if L >= 3 +pointwise 2048 +acc +#endif + +#if L >= 4 +pointwise 3072 +acc +#endif + +#if L >= 5 +pointwise 4096 +acc +#endif + +#if L >= 6 +pointwise 5120 +acc +#endif + +#if L >= 7 +pointwise 6144 +acc +#endif + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.c new file mode 100644 index 0000000000..25d36828ad --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.c @@ -0,0 +1,1138 @@ +#include +#include +#include +#include "align.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" +#include "symmetric.h" +#include "fips202x4.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i off = _mm256_set1_epi32(1<<22); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f,off); + g = _mm256_srai_epi32(g,23); + g = _mm256_mullo_epi32(g,q); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero,q,f); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f,D); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +void poly_nttunpack(poly *a) { + DBENCH_START(); + + nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + pointwise_avx(c->vec, a->vec, b->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) +{ + unsigned int r; + DBENCH_START(); + + r = make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) +{ + DBENCH_START(); + + use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f,t; + const __m256i bound = _mm256_set1_epi32(B-1); + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + t = _mm256_setzero_si256(); + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f,bound); + t = _mm256_or_si256(t,f); + } + + r = 1 - _mm256_testz_si256(t,t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_preinit(poly *a, stream128_state *state) +{ + unsigned int ctr; + /* rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = rej_uniform_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + stream128_state state; + stream128_init(&state, seed, nonce); + poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf[4]; + shake128x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec,f); + _mm256_store_si256(buf[1].vec,f); + _mm256_store_si256(buf[2].vec,f); + _mm256_store_si256(buf[3].vec,f); + + buf[0].coeffs[SEEDBYTES+0] = nonce0; + buf[0].coeffs[SEEDBYTES+1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES+0] = nonce1; + buf[1].coeffs[SEEDBYTES+1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES+0] = nonce2; + buf[2].coeffs[SEEDBYTES+1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES+0] = nonce3; + buf[3].coeffs[SEEDBYTES+1] = nonce3 >> 8; + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_eta_preinit(poly *a, stream256_state *state) +{ + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf; + + stream256_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = rej_eta_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + stream256_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM256_BLOCKBYTES); + } +} + +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_eta_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE256_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE256_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE256_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE256_RATE); + } + shake256x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state) +{ + /* polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + polyz_unpack(a, buf.coeffs); +} + +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf[4]; + shake256x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + polyz_unpack(a0, buf[0].coeffs); + polyz_unpack(a1, buf[1].coeffs); + polyz_unpack(a2, buf[2].coeffs); + polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf.coeffs, 1, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly * restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly * restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly * restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +#if GAMMA1 == (1 << 17) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 9, 8, 7,-1, 7, 6, 5,-1, 5, 4, 3,-1, 3, 2, 1, + -1, 8, 7, 6,-1, 6, 5, 4,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(6,4,2,0,6,4,2,0); + const __m256i mask = _mm256_set1_epi32(0x3FFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[18*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA1 == (1 << 19) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1,11,10, 9,-1, 9, 8, 7,-1, 6, 5, 4,-1, 4, 3, 2, + -1, 9, 8, 7,-1, 7, 6, 5,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set1_epi64x((uint64_t)4 << 32); + const __m256i mask = _mm256_set1_epi32(0xFFFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[20*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} +#endif + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +#if GAMMA2 == (Q-1)/88 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0,f1,f2,f3; + const __m256i shift1 = _mm256_set1_epi16((64 << 8) + 1); + const __m256i shift2 = _mm256_set1_epi32((4096 << 16) + 1); + const __m256i shufdidx1 = _mm256_set_epi32(7,3,6,2,5,1,4,0); + const __m256i shufdidx2 = _mm256_set_epi32(-1,-1,6,5,4,2,1,0); + const __m256i shufbidx = _mm256_set_epi8(-1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0, + -1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/32; i++) { + f0 = _mm256_load_si256(&a->vec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift1); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx2); + _mm256_storeu_si256((__m256i *)&r[24*i],f0); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA2 == (Q-1)/32 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3, f4, f5, f6, f7; + const __m256i shift = _mm256_set1_epi16((16 << 8) + 1); + const __m256i shufbidx = _mm256_set_epi8(15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0, + 15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/64; ++i) { + f0 = _mm256_load_si256(&a->vec[8*i+0]); + f1 = _mm256_load_si256(&a->vec[8*i+1]); + f2 = _mm256_load_si256(&a->vec[8*i+2]); + f3 = _mm256_load_si256(&a->vec[8*i+3]); + f4 = _mm256_load_si256(&a->vec[8*i+4]); + f5 = _mm256_load_si256(&a->vec[8*i+5]); + f6 = _mm256_load_si256(&a->vec[8*i+6]); + f7 = _mm256_load_si256(&a->vec[8*i+7]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f2 = _mm256_packus_epi32(f4,f5); + f3 = _mm256_packus_epi32(f6,f7); + f0 = _mm256_packus_epi16(f0,f1); + f1 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift); + f1 = _mm256_maddubs_epi16(f1,shift); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_permute4x64_epi64(f0,0xD8); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + _mm256_storeu_si256((__m256i *)&r[32*i], f0); + } + + DBENCH_STOP(*tpack); +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.h new file mode 100644 index 0000000000..7bcd8e5e03 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/poly.h @@ -0,0 +1,112 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" +#include "symmetric.h" + +typedef ALIGNED_INT32(N) poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_nttunpack DILITHIUM_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform_preinit DILITHIUM_NAMESPACE(poly_uniform_preinit) +void poly_uniform_preinit(poly *a, stream128_state *state); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +#define poly_uniform_eta_preinit DILITHIUM_NAMESPACE(poly_uniform_eta_preinit) +void poly_uniform_eta_preinit(poly *a, stream256_state *state); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_uniform_gamma1_preinit DILITHIUM_NAMESPACE(poly_uniform_gamma1_preinit) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define poly_uniform_4x DILITHIUM_NAMESPACE(poly_uniform_4x) +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_eta_4x DILITHIUM_NAMESPACE(poly_uniform_eta_4x) +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_gamma1_4x DILITHIUM_NAMESPACE(poly_uniform_gamma1_4x) +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..6e2302168e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.c @@ -0,0 +1,588 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ + +#if K == 4 && L == 4 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], NULL, rho); + polyvec_matrix_expand_row1(&mat[1], NULL, rho); + polyvec_matrix_expand_row2(&mat[2], NULL, rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 256, 257, 258, 259); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 512, 513, 514, 515); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 768, 769, 770, 771); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +#elif K == 6 && L == 5 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvecl tmp; + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &tmp, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 4, 256, 257, 258); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 259, 260, 512, 513); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowb->vec[0], rho, 514, 515, 516, 768); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 769, 770, 771, 772); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1028, 1280, 1281, 1282); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 1283, 1284, 1536, 1537); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +#elif K == 8 && L == 7 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &mat[6], rho); + polyvec_matrix_expand_row6(&mat[6], &mat[7], rho); + polyvec_matrix_expand_row7(&mat[7], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 4, 5, 6, 256); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 257, 258, 259, 260); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 261, 262, 512, 513); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 514, 515, 516, 517); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 518, 768, 769, 770); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 771, 772, 773, 774); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 1028, 1029, 1030, 1280); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 1281, 1282, 1283, 1284); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 1285, 1286, 1536, 1537); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 1538, 1539, 1540, 1541); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1542, 1792, 1793, 1794); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row7(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 1795, 1796, 1797, 1798); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +#else +#error +#endif + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, qdata.vec); +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) +{ + unsigned int i, n = 0; + + for(i = 0; i < K; ++i) + n += poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + + return n; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..1b6dc87ac6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/polyvec.h @@ -0,0 +1,105 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_expand_row0 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row0) +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row1 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row1) +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row2 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row2) +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row3 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row3) +void polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row4 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row4) +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row5 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row5) +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row6 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row6) +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row7 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row7) +void polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..8b1dde4440 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.c @@ -0,0 +1,476 @@ +#include +#include +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" + +const uint8_t idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int rej_uniform_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]) +{ + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1,15,14,13,-1,12,11,10, + -1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6, + -1, 5, 4, 3,-1, 2, 1, 0); + + ctr = pos = 0; + while(pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if(ctr > N - 8) break; + } + + uint32_t t; + while(ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + r[ctr++] = t; + } + + return ctr; +} + +#if ETA == 2 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1, f2; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(ETA); + const __m256i bound = mask; + const __m256i v = _mm256_set1_epi32(-6560); + const __m256i p = _mm256_set1_epi32(5); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + r[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < N) { + t1 = t1 - (205*t1 >> 10)*5; + r[ctr++] = 2 - t1; + } + } + + return ctr; +} + +#elif ETA == 4 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(4); + const __m256i bound = _mm256_set1_epi8(9); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 9) + r[ctr++] = 4 - t0; + if(t1 < 9 && ctr < N) + r[ctr++] = 4 - t1; + } + + return ctr; +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..61f3f357a5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rejsample.h @@ -0,0 +1,28 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#if ETA == 2 +#define REJ_UNIFORM_ETA_NBLOCKS ((136+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define REJ_UNIFORM_ETA_NBLOCKS ((227+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#endif +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES) + +#define idxlut DILITHIUM_NAMESPACE(idxlut) +extern const uint8_t idxlut[256][8]; + +#define rej_uniform_avx DILITHIUM_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]); + +#define rej_eta_avx DILITHIUM_NAMESPACE(rej_eta_avx) +unsigned int rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]); + +#endif + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rounding.c new file mode 100644 index 0000000000..3ada656776 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/rounding.c @@ -0,0 +1,200 @@ +#include +#include +#include +#include "params.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D-1)) - 1); + + for(i = 0; i < N/8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f,half); + f0 = _mm256_and_si256(f1,mask); + f1 = _mm256_srli_epi32(f1,D); + f0 = _mm256_sub_epi32(f,f0); + _mm256_store_si256(&a1[i],f1); + _mm256_store_si256(&a0[i],f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +#if GAMMA2 == (Q-1)/32 +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i hq = _mm256_srli_epi32(q,1); + const __m256i v = _mm256_set1_epi32(1025); + const __m256i alpha = _mm256_set1_epi32(2*GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(512); + const __m256i mask = _mm256_set1_epi32(15); + + for(i=0;i +#include +#include "params.h" + +#define power2round_avx DILITHIUM_NAMESPACE(power2round_avx) +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define decompose_avx DILITHIUM_NAMESPACE(decompose_avx) +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define make_hint_avx DILITHIUM_NAMESPACE(make_hint_avx) +unsigned int make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +#define use_hint_avx DILITHIUM_NAMESPACE(use_hint_avx) +void use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..133e05132b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.S @@ -0,0 +1,52 @@ +#include "consts.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.inc b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.c new file mode 100644 index 0000000000..a39f8515c4 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.c @@ -0,0 +1,445 @@ +#include +#include +#include "align.h" +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch(i) { + case 0: + polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; +#if K > 4 + case 4: + polyvec_matrix_expand_row4(buf, buf + 1, rho); + *row = buf; + break; + case 5: + polyvec_matrix_expand_row5(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif +#if K > 6 + case 6: + polyvec_matrix_expand_row6(buf, buf + 1, rho); + *row = buf; + break; + case 7: + polyvec_matrix_expand_row7(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif + } +} + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ +#if K == 4 && L == 4 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s2.vec[0], &s2.vec[1], &s2.vec[2], &s2.vec[3], rhoprime, 4, 5, 6, 7); +#elif K == 6 && L == 5 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s2.vec[0], &s2.vec[1], &s2.vec[2], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[3], &s2.vec[4], &s2.vec[5], &t0, rhoprime, 8, 9, 10, 11); +#elif K == 8 && L == 7 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s1.vec[5], &s1.vec[6], &s2.vec[0], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[1], &s2.vec[2], &s2.vec[3], &s2.vec[4], rhoprime, 8, 9, 10, 11); + poly_uniform_eta_4x(&s2.vec[5], &s2.vec[6], &s2.vec[7], &t0, rhoprime, 12, 13, 14, 15); +#else +#error +#endif + + /* Pack secret vectors */ + for(i = 0; i < L; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + i*POLYETA_PACKEDBYTES, &s1.vec[i]); + for(i = 0; i < K; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + + /* Transform s1 */ + polyvecl_ntt(&s1); + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + poly_invntt_tomont(&t1); + + /* Add error polynomial */ + poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + poly_caddq(&t1); + poly_power2round(&t1, &t0, &t1); + polyt1_pack(pk + SEEDBYTES + i*POLYT1_PACKEDBYTES, &t1); + polyt0_pack(sk + 2*SEEDBYTES + TRBYTES + (L+K)*POLYETA_PACKEDBYTES + i*POLYT0_PACKEDBYTES, &t0); + } + + /* Compute H(rho, t1) and store in secret key */ + shake256(sk + 2*SEEDBYTES, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *rnd, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + memset(rnd, 0, RNDBYTES); +#endif + shake256(rhoprime, CRHBYTES, key, SEEDBYTES + RNDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + polyvec_matrix_expand(mat, rho); + polyvecl_ntt(&s1); + polyveck_ntt(&s2); + polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ +#if L == 4 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + nonce += 4; +#elif L == 5 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1(&z.vec[4], rhoprime, nonce + 4); + nonce += 5; +#elif L == 7 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1_4x(&z.vec[4], &z.vec[5], &z.vec[6], &tmp, + rhoprime, nonce + 4, nonce + 5, nonce + 6, 0); + nonce += 7; +#else +#error +#endif + + /* Matrix-vector product */ + tmpv.y = z; + polyvecl_ntt(&tmpv.y); + polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + polyveck_caddq(&w1); + polyveck_decompose(&w1, &tmpv.w0, &w1); + polyveck_pack_w1(sig, &w1); + + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, CTILDEBYTES, &state); + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for(i = 0; i < L; i++) { + poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + poly_invntt_tomont(&tmp); + poly_add(&z.vec[i], &z.vec[i], &tmp); + poly_reduce(&z.vec[i]); + if(poly_chknorm(&z.vec[i], GAMMA1 - BETA)) + goto rej; + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for(i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + poly_invntt_tomont(&tmp); + poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + poly_reduce(&tmpv.w0.vec[i]); + if(poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) + goto rej; + + /* Compute hints */ + poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + poly_invntt_tomont(&tmp); + poly_reduce(&tmp); + if(poly_chknorm(&tmp, GAMMA2)) + goto rej; + + poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if(pos + n > OMEGA) + goto rej; + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + shake256_inc_ctx_release(&state); + /* Pack z into signature */ + for(i = 0; i < L; i++) + polyz_pack(sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES, &z.vec[i]); + + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K*POLYW1_PACKEDBYTES+14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand challenge */ + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for(i = 0; i < L; i++) { + polyz_unpack(&z.vec[i], sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES); + poly_ntt(&z.vec[i]); + } + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + polyt1_unpack(&h, pk + SEEDBYTES + i*POLYT1_PACKEDBYTES); + poly_shiftl(&h); + poly_ntt(&h); + poly_pointwise_montgomery(&h, &c, &h); + + poly_sub(&w1, &w1, &h); + poly_reduce(&w1); + poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if(hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) + return -1; + + for(j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > pos && hint[j] <= hint[j-1]) return -1; + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + poly_caddq(&w1); + poly_use_hint(&w1, &w1, &h); + polyw1_pack(buf.coeffs + i*POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for(j = pos; j < OMEGA; ++j) + if(hint[j]) return -1; + + /* Call random oracle and verify challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(buf.coeffs[i] != sig[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..fa49963ae3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_avx2/symmetric.h @@ -0,0 +1,28 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/api.h new file mode 100644 index 0000000000..78caa5c728 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_ref_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_ref_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_ref_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_ref_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_ref_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_ref_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_ref_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_ref_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_ref_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/config.h new file mode 100644 index 0000000000..eddf13f5ea --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_ref_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_ref_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_ref_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.c new file mode 100644 index 0000000000..5ea8b530e1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.c @@ -0,0 +1,98 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for(len = 128; len > 0; len >>= 1) { + for(start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for(j = start; j < start + len; ++j) { + t = montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for(len = 1; len < N; len <<= 1) { + for(start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for(j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for(j = 0; j < N; ++j) { + a[j] = montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.h new file mode 100644 index 0000000000..731132d5cd --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/ntt.h @@ -0,0 +1,13 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define ntt DILITHIUM_NAMESPACE(ntt) +void ntt(int32_t a[N]); + +#define invntt_tomont DILITHIUM_NAMESPACE(invntt_tomont) +void invntt_tomont(int32_t a[N]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.c new file mode 100644 index 0000000000..7983aacdd1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.c @@ -0,0 +1,911 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = reduce32(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = caddq(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] <<= D; + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = power2round(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = decompose(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for(i = 0; i < N; ++i) { + h->coeffs[i] = make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + b->coeffs[i] = use_hint(a->coeffs[i], h->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for(i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2*a->coeffs[i]); + + if(t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) +{ + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while(ctr < N) { + off = buflen % 3; + for(i = 0; i < off; ++i) + buf[i] = buf[buflen - off + i]; + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#if ETA == 2 +#define POLY_UNIFORM_ETA_NBLOCKS ((136 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define POLY_UNIFORM_ETA_NBLOCKS ((227 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#endif +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while(ctr < N) { + stream256_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM256_BLOCKBYTES); + } + stream256_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + polyz_unpack(a, buf); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_squeezeblocks(buf, 1, &state); + + signs = 0; + for(i = 0; i < 8; ++i) + signs |= (uint64_t)buf[i] << 8*i; + pos = 8; + + for(i = 0; i < N; ++i) + c->coeffs[i] = 0; + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf, 1, &state); + pos = 0; + } + + b = buf[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = a[9*i+0]; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+1] << 8; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+2] << 16; + r->coeffs[4*i+0] &= 0x3FFFF; + + r->coeffs[4*i+1] = a[9*i+2] >> 2; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+3] << 6; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+4] << 14; + r->coeffs[4*i+1] &= 0x3FFFF; + + r->coeffs[4*i+2] = a[9*i+4] >> 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+5] << 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+6] << 12; + r->coeffs[4*i+2] &= 0x3FFFF; + + r->coeffs[4*i+3] = a[9*i+6] >> 6; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+7] << 2; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+8] << 10; + r->coeffs[4*i+3] &= 0x3FFFF; + + r->coeffs[4*i+0] = GAMMA1 - r->coeffs[4*i+0]; + r->coeffs[4*i+1] = GAMMA1 - r->coeffs[4*i+1]; + r->coeffs[4*i+2] = GAMMA1 - r->coeffs[4*i+2]; + r->coeffs[4*i+3] = GAMMA1 - r->coeffs[4*i+3]; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[5*i+0]; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+1] << 8; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+2] << 16; + r->coeffs[2*i+0] &= 0xFFFFF; + + r->coeffs[2*i+1] = a[5*i+2] >> 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+3] << 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+4] << 12; + /* r->coeffs[2*i+1] &= 0xFFFFF; */ /* No effect, since we're anyway at 20 bits */ + + r->coeffs[2*i+0] = GAMMA1 - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = GAMMA1 - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA2 == (Q-1)/88 + for(i = 0; i < N/4; ++i) { + r[3*i+0] = a->coeffs[4*i+0]; + r[3*i+0] |= a->coeffs[4*i+1] << 6; + r[3*i+1] = a->coeffs[4*i+1] >> 2; + r[3*i+1] |= a->coeffs[4*i+2] << 4; + r[3*i+2] = a->coeffs[4*i+2] >> 4; + r[3*i+2] |= a->coeffs[4*i+3] << 2; + } +#elif GAMMA2 == (Q-1)/32 + for(i = 0; i < N/2; ++i) + r[i] = a->coeffs[2*i+0] | (a->coeffs[2*i+1] << 4); +#endif + + DBENCH_STOP(*tpack); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.h new file mode 100644 index 0000000000..d2fd989b6a --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/poly.h @@ -0,0 +1,79 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +typedef struct { + int32_t coeffs[N]; +} poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t *r, const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t *a); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t *r, const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t *a); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t *r, const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t *a); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t *r, const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.c new file mode 100644 index 0000000000..40032b656b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.c @@ -0,0 +1,389 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for(i = 0; i < K; ++i) + for(j = 0; j < L; ++j) + poly_uniform(&mat[i].vec[j], rho, (i << 8) + j); +} + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) +{ + unsigned int i; + poly t; + + poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for(i = 1; i < L; ++i) { + poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + poly_add(w, w, &t); + } +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) +{ + unsigned int i, s = 0; + + for(i = 0; i < K; ++i) + s += poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + + return s; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.h new file mode 100644 index 0000000000..615ac52990 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/polyvec.h @@ -0,0 +1,93 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.c new file mode 100644 index 0000000000..75feff8bc5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.c @@ -0,0 +1,69 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t montgomery_reduce(int64_t a) { + int32_t t; + + t = (int64_t)(int32_t)a*QINV; + t = (a - (int64_t)t*Q) >> 32; + return t; +} + +/************************************************* +* Name: reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t*Q; + return t; +} + +/************************************************* +* Name: caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t freeze(int32_t a) { + a = reduce32(a); + a = caddq(a); + return a; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.h new file mode 100644 index 0000000000..26d9b4ee2e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/reduce.h @@ -0,0 +1,22 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -4186625 // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +#define montgomery_reduce DILITHIUM_NAMESPACE(montgomery_reduce) +int32_t montgomery_reduce(int64_t a); + +#define reduce32 DILITHIUM_NAMESPACE(reduce32) +int32_t reduce32(int32_t a); + +#define caddq DILITHIUM_NAMESPACE(caddq) +int32_t caddq(int32_t a); + +#define freeze DILITHIUM_NAMESPACE(freeze) +int32_t freeze(int32_t a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.c new file mode 100644 index 0000000000..889f0a296b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.c @@ -0,0 +1,102 @@ +#include +#include "params.h" +#include "rounding.h" + +/************************************************* +* Name: power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D-1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; +#if GAMMA2 == (Q-1)/32 + a1 = (a1*1025 + (1 << 21)) >> 22; + a1 &= 15; +#elif GAMMA2 == (Q-1)/88 + a1 = (a1*11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; +#endif + + *a0 = a - a1*2*GAMMA2; + *a0 -= (((Q-1)/2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int make_hint(int32_t a0, int32_t a1) { + if(a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) + return 1; + + return 0; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = decompose(&a0, a); + if(hint == 0) + return a1; + +#if GAMMA2 == (Q-1)/32 + if(a0 > 0) + return (a1 + 1) & 15; + else + return (a1 - 1) & 15; +#elif GAMMA2 == (Q-1)/88 + if(a0 > 0) + return (a1 == 43) ? 0 : a1 + 1; + else + return (a1 == 0) ? 43 : a1 - 1; +#endif +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.h new file mode 100644 index 0000000000..b72e8e8d66 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/rounding.h @@ -0,0 +1,19 @@ +#ifndef ROUNDING_H +#define ROUNDING_H + +#include +#include "params.h" + +#define power2round DILITHIUM_NAMESPACE(power2round) +int32_t power2round(int32_t *a0, int32_t a); + +#define decompose DILITHIUM_NAMESPACE(decompose) +int32_t decompose(int32_t *a0, int32_t a); + +#define make_hint DILITHIUM_NAMESPACE(make_hint) +unsigned int make_hint(int32_t a0, int32_t a1); + +#define use_hint DILITHIUM_NAMESPACE(use_hint) +int32_t use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.c new file mode 100644 index 0000000000..9298ad2177 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.c @@ -0,0 +1,341 @@ +#include +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + uint8_t tr[TRBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Expand matrix */ + polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + polyvecl_uniform_eta(&s1, rhoprime, 0); + polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + polyvecl_ntt(&s1hat); + polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + polyveck_reduce(&t1); + polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + polyveck_caddq(&t1); + polyveck_power2round(&t1, &t0, &t1); + pack_pk(pk, rho, &t1); + + /* Compute H(rho, t1) and write secret key */ + shake256(tr, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + unsigned int n; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime, *rnd; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + + /* Compute mu = CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + for(n=0;n OMEGA) + goto rej; + + shake256_inc_ctx_release(&state); + + /* Write signature */ + pack_sig(sig, sig, &z, &h); + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) +{ + unsigned int i; + uint8_t buf[K*POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[CTILDEBYTES]; + uint8_t c2[CTILDEBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + unpack_pk(rho, &t1, pk); + if(unpack_sig(c, &z, &h, sig)) + return -1; + if(polyvecl_chknorm(&z, GAMMA1 - BETA)) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + poly_challenge(&cp, c); /* uses only the first SEEDBYTES bytes of c */ + polyvec_matrix_expand(mat, rho); + + polyvecl_ntt(&z); + polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + poly_ntt(&cp); + polyveck_shiftl(&t1); + polyveck_ntt(&t1); + polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + polyveck_sub(&w1, &w1, &t1); + polyveck_reduce(&w1); + polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + polyveck_caddq(&w1); + polyveck_use_hint(&w1, &w1, &h); + polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify challenge */ + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(c[i] != c2[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) +{ + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric.h new file mode 100644 index 0000000000..211de3b860 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-65-ipd_ref/symmetric.h @@ -0,0 +1,36 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) \ + dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/align.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/align.h new file mode 100644 index 0000000000..33fac1d968 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/align.h @@ -0,0 +1,19 @@ +#ifndef ALIGN_H +#define ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[(N+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[(N+7)/8]; \ + } + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/api.h new file mode 100644 index 0000000000..55b637669d --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_avx2_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_avx2_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_avx2_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_avx2_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_avx2_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_avx2_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_avx2_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_avx2_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_avx2_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_avx2_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_avx2_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_avx2_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_avx2_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/config.h new file mode 100644 index 0000000000..e59f81a5e8 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_avx2_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_avx2_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_avx2 +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_avx2_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.c new file mode 100644 index 0000000000..414d99eceb --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.c @@ -0,0 +1,100 @@ +#include +#include "params.h" +#include "consts.h" + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT -4186625 // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV -8395782 + +const qdata_t qdata = {{ +#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, +}}; diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.h new file mode 100644 index 0000000000..930d2f09b3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/consts.h @@ -0,0 +1,38 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "params.h" + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found. + * + * This define helps us get around this + */ +#if defined(__WIN32__) || defined(__APPLE__) +#define decorate(s) _##s +#define _cdecl(s) decorate(s) +#define cdecl(s) _cdecl(DILITHIUM_NAMESPACE(##s)) +#else +#define cdecl(s) DILITHIUM_NAMESPACE(##s) +#endif + +#ifndef __ASSEMBLER__ + +#include "align.h" + +typedef ALIGNED_INT32(624) qdata_t; + +#define qdata DILITHIUM_NAMESPACE(qdata) +extern const qdata_t qdata; + +#endif +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/invntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/invntt.S new file mode 100644 index 0000000000..3e9864c994 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/invntt.S @@ -0,0 +1,238 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(invntt_avx) +cdecl(invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.S new file mode 100644 index 0000000000..38415de893 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.S @@ -0,0 +1,197 @@ +#include "consts.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(ntt_avx) +cdecl(ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.h new file mode 100644 index 0000000000..0c4fbdd342 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/ntt.h @@ -0,0 +1,19 @@ +#ifndef NTT_H +#define NTT_H + +#include + +#define ntt_avx DILITHIUM_NAMESPACE(ntt_avx) +void ntt_avx(__m256i *a, const __m256i *qdata); +#define invntt_avx DILITHIUM_NAMESPACE(invntt_avx) +void invntt_avx(__m256i *a, const __m256i *qdata); + +#define nttunpack_avx DILITHIUM_NAMESPACE(nttunpack_avx) +void nttunpack_avx(__m256i *a); + +#define pointwise_avx DILITHIUM_NAMESPACE(pointwise_avx) +void pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); +#define pointwise_acc_avx DILITHIUM_NAMESPACE(pointwise_acc_avx) +void pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/pointwise.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/pointwise.S new file mode 100644 index 0000000000..ae7ff7995c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/pointwise.S @@ -0,0 +1,211 @@ +#include "params.h" +#include "consts.h" + +.text +.global cdecl(pointwise_avx) +cdecl(pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(pointwise_acc_avx) +cdecl(pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +#if L >= 3 +pointwise 2048 +acc +#endif + +#if L >= 4 +pointwise 3072 +acc +#endif + +#if L >= 5 +pointwise 4096 +acc +#endif + +#if L >= 6 +pointwise 5120 +acc +#endif + +#if L >= 7 +pointwise 6144 +acc +#endif + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.c new file mode 100644 index 0000000000..25d36828ad --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.c @@ -0,0 +1,1138 @@ +#include +#include +#include +#include "align.h" +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" +#include "symmetric.h" +#include "fips202x4.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i off = _mm256_set1_epi32(1<<22); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f,off); + g = _mm256_srai_epi32(g,23); + g = _mm256_mullo_epi32(g,q); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + __m256i f,g; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero,q,f); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f,g; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f,g); + _mm256_store_si256(&c->vec[i],f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f,D); + _mm256_store_si256(&a->vec[i],f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_avx(a->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +void poly_nttunpack(poly *a) { + DBENCH_START(); + + nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + pointwise_avx(c->vec, a->vec, b->vec, qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) +{ + DBENCH_START(); + + decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) +{ + unsigned int r; + DBENCH_START(); + + r = make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) +{ + DBENCH_START(); + + use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f,t; + const __m256i bound = _mm256_set1_epi32(B-1); + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + t = _mm256_setzero_si256(); + for(i = 0; i < N/8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f,bound); + t = _mm256_or_si256(t,f); + } + + r = 1 - _mm256_testz_si256(t,t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_preinit(poly *a, stream128_state *state) +{ + unsigned int ctr; + /* rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = rej_uniform_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + stream128_state state; + stream128_init(&state, seed, nonce); + poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf[4]; + shake128x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec,f); + _mm256_store_si256(buf[1].vec,f); + _mm256_store_si256(buf[2].vec,f); + _mm256_store_si256(buf[3].vec,f); + + buf[0].coeffs[SEEDBYTES+0] = nonce0; + buf[0].coeffs[SEEDBYTES+1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES+0] = nonce1; + buf[1].coeffs[SEEDBYTES+1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES+0] = nonce2; + buf[2].coeffs[SEEDBYTES+1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES+0] = nonce3; + buf[3].coeffs[SEEDBYTES+1] = nonce3 >> 8; + + shake128x4_inc_init(&state); + shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } + shake128x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void poly_uniform_eta_preinit(poly *a, stream256_state *state) +{ + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf; + + stream256_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = rej_eta_avx(a->coeffs, buf.coeffs); + + while(ctr < N) { + stream256_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM256_BLOCKBYTES); + } +} + +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_eta_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + shake256x4incctx state; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while(ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE256_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE256_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE256_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE256_RATE); + } + shake256x4_inc_ctx_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state) +{ + /* polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + polyz_unpack(a, buf.coeffs); +} + +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + stream256_state state; + stream256_init(&state, seed, nonce); + poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[64], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) +{ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf[4]; + shake256x4incctx state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)&seed[0]); + _mm256_store_si256(&buf[0].vec[0],f); + _mm256_store_si256(&buf[1].vec[0],f); + _mm256_store_si256(&buf[2].vec[0],f); + _mm256_store_si256(&buf[3].vec[0],f); + f = _mm256_loadu_si256((__m256i *)&seed[32]); + _mm256_store_si256(&buf[0].vec[1],f); + _mm256_store_si256(&buf[1].vec[1],f); + _mm256_store_si256(&buf[2].vec[1],f); + _mm256_store_si256(&buf[3].vec[1],f); + + buf[0].coeffs[64] = nonce0; + buf[0].coeffs[65] = nonce0 >> 8; + buf[1].coeffs[64] = nonce1; + buf[1].coeffs[65] = nonce1 >> 8; + buf[2].coeffs[64] = nonce2; + buf[2].coeffs[65] = nonce2 >> 8; + buf[3].coeffs[64] = nonce3; + buf[3].coeffs[65] = nonce3 >> 8; + + shake256x4_inc_init(&state); + shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66); + shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + shake256x4_inc_ctx_release(&state); + + polyz_unpack(a0, buf[0].coeffs); + polyz_unpack(a1, buf[1].coeffs); + polyz_unpack(a2, buf[2].coeffs); + polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf.coeffs, 1, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly * restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly * restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly * restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly * restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +#if GAMMA1 == (1 << 17) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 9, 8, 7,-1, 7, 6, 5,-1, 5, 4, 3,-1, 3, 2, 1, + -1, 8, 7, 6,-1, 6, 5, 4,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(6,4,2,0,6,4,2,0); + const __m256i mask = _mm256_set1_epi32(0x3FFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[18*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA1 == (1 << 19) +void polyz_unpack(poly * restrict r, const uint8_t *a) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1,11,10, 9,-1, 9, 8, 7,-1, 6, 5, 4,-1, 4, 3, 2, + -1, 9, 8, 7,-1, 7, 6, 5,-1, 4, 3, 2,-1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set1_epi64x((uint64_t)4 << 32); + const __m256i mask = _mm256_set1_epi32(0xFFFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for(i = 0; i < N/8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[20*i]); + f = _mm256_permute4x64_epi64(f,0x94); + f = _mm256_shuffle_epi8(f,shufbidx); + f = _mm256_srlv_epi32(f,srlvdidx); + f = _mm256_and_si256(f,mask); + f = _mm256_sub_epi32(gamma1,f); + _mm256_store_si256(&r->vec[i],f); + } + + DBENCH_STOP(*tpack); +} +#endif + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +#if GAMMA2 == (Q-1)/88 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0,f1,f2,f3; + const __m256i shift1 = _mm256_set1_epi16((64 << 8) + 1); + const __m256i shift2 = _mm256_set1_epi32((4096 << 16) + 1); + const __m256i shufdidx1 = _mm256_set_epi32(7,3,6,2,5,1,4,0); + const __m256i shufdidx2 = _mm256_set_epi32(-1,-1,6,5,4,2,1,0); + const __m256i shufbidx = _mm256_set_epi8(-1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0, + -1,-1,-1,-1,14,13,12,10, 9, 8, 6, 5, 4, 2, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/32; i++) { + f0 = _mm256_load_si256(&a->vec[4*i+0]); + f1 = _mm256_load_si256(&a->vec[4*i+1]); + f2 = _mm256_load_si256(&a->vec[4*i+2]); + f3 = _mm256_load_si256(&a->vec[4*i+3]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_maddubs_epi16(f0,shift1); + f0 = _mm256_madd_epi16(f0,shift2); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx1); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + f0 = _mm256_permutevar8x32_epi32(f0,shufdidx2); + _mm256_storeu_si256((__m256i *)&r[24*i],f0); + } + + DBENCH_STOP(*tpack); +} + +#elif GAMMA2 == (Q-1)/32 +void polyw1_pack(uint8_t *r, const poly * restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3, f4, f5, f6, f7; + const __m256i shift = _mm256_set1_epi16((16 << 8) + 1); + const __m256i shufbidx = _mm256_set_epi8(15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0, + 15,14, 7, 6,13,12, 5, 4,11,10, 3, 2, 9, 8, 1, 0); + DBENCH_START(); + + for(i = 0; i < N/64; ++i) { + f0 = _mm256_load_si256(&a->vec[8*i+0]); + f1 = _mm256_load_si256(&a->vec[8*i+1]); + f2 = _mm256_load_si256(&a->vec[8*i+2]); + f3 = _mm256_load_si256(&a->vec[8*i+3]); + f4 = _mm256_load_si256(&a->vec[8*i+4]); + f5 = _mm256_load_si256(&a->vec[8*i+5]); + f6 = _mm256_load_si256(&a->vec[8*i+6]); + f7 = _mm256_load_si256(&a->vec[8*i+7]); + f0 = _mm256_packus_epi32(f0,f1); + f1 = _mm256_packus_epi32(f2,f3); + f2 = _mm256_packus_epi32(f4,f5); + f3 = _mm256_packus_epi32(f6,f7); + f0 = _mm256_packus_epi16(f0,f1); + f1 = _mm256_packus_epi16(f2,f3); + f0 = _mm256_maddubs_epi16(f0,shift); + f1 = _mm256_maddubs_epi16(f1,shift); + f0 = _mm256_packus_epi16(f0,f1); + f0 = _mm256_permute4x64_epi64(f0,0xD8); + f0 = _mm256_shuffle_epi8(f0,shufbidx); + _mm256_storeu_si256((__m256i *)&r[32*i], f0); + } + + DBENCH_STOP(*tpack); +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.h new file mode 100644 index 0000000000..7bcd8e5e03 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/poly.h @@ -0,0 +1,112 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "align.h" +#include "params.h" +#include "symmetric.h" + +typedef ALIGNED_INT32(N) poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_nttunpack DILITHIUM_NAMESPACE(poly_nttunpack) +void poly_nttunpack(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform_preinit DILITHIUM_NAMESPACE(poly_uniform_preinit) +void poly_uniform_preinit(poly *a, stream128_state *state); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +#define poly_uniform_eta_preinit DILITHIUM_NAMESPACE(poly_uniform_eta_preinit) +void poly_uniform_eta_preinit(poly *a, stream256_state *state); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_uniform_gamma1_preinit DILITHIUM_NAMESPACE(poly_uniform_gamma1_preinit) +void poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define poly_uniform_4x DILITHIUM_NAMESPACE(poly_uniform_4x) +void poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_eta_4x DILITHIUM_NAMESPACE(poly_uniform_eta_4x) +void poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +#define poly_uniform_gamma1_4x DILITHIUM_NAMESPACE(poly_uniform_gamma1_4x) +void poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.c new file mode 100644 index 0000000000..6e2302168e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.c @@ -0,0 +1,588 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" +#include "ntt.h" +#include "consts.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ + +#if K == 4 && L == 4 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], NULL, rho); + polyvec_matrix_expand_row1(&mat[1], NULL, rho); + polyvec_matrix_expand_row2(&mat[2], NULL, rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 256, 257, 258, 259); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 512, 513, 514, 515); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 768, 769, 770, 771); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); +} + +#elif K == 6 && L == 5 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvecl tmp; + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &tmp, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 4, 256, 257, 258); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 259, 260, 512, 513); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowb->vec[0], rho, 514, 515, 516, 768); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 769, 770, 771, 772); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1028, 1280, 1281, 1282); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 1283, 1284, 1536, 1537); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); +} + +#elif K == 8 && L == 7 +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + polyvec_matrix_expand_row3(&mat[3], NULL, rho); + polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + polyvec_matrix_expand_row5(&mat[5], &mat[6], rho); + polyvec_matrix_expand_row6(&mat[6], &mat[7], rho); + polyvec_matrix_expand_row7(&mat[7], NULL, rho); +} + +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 4, 5, 6, 256); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 257, 258, 259, 260); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 261, 262, 512, 513); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 514, 515, 516, 517); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 518, 768, 769, 770); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row3(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 771, 772, 773, 774); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 1028, 1029, 1030, 1280); + poly_nttunpack(&rowa->vec[0]); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); +} + +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 1281, 1282, 1283, 1284); + poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 1285, 1286, 1536, 1537); + poly_nttunpack(&rowa->vec[1]); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); +} + +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 1538, 1539, 1540, 1541); + poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1542, 1792, 1793, 1794); + poly_nttunpack(&rowa->vec[2]); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); + poly_nttunpack(&rowb->vec[0]); + poly_nttunpack(&rowb->vec[1]); + poly_nttunpack(&rowb->vec[2]); +} + +void polyvec_matrix_expand_row7(polyvecl *rowa, __attribute__((unused)) polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 1795, 1796, 1797, 1798); + poly_nttunpack(&rowa->vec[3]); + poly_nttunpack(&rowa->vec[4]); + poly_nttunpack(&rowa->vec[5]); + poly_nttunpack(&rowa->vec[6]); +} + +#else +#error +#endif + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, qdata.vec); +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) +{ + unsigned int i, n = 0; + + for(i = 0; i < K; ++i) + n += poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + + return n; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.h new file mode 100644 index 0000000000..1b6dc87ac6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/polyvec.h @@ -0,0 +1,105 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_expand_row0 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row0) +void polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row1 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row1) +void polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row2 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row2) +void polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row3 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row3) +void polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row4 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row4) +void polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row5 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row5) +void polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row6 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row6) +void polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +#define polyvec_matrix_expand_row7 DILITHIUM_NAMESPACE(polyvec_matrix_expand_row7) +void polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.c new file mode 100644 index 0000000000..8b1dde4440 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.c @@ -0,0 +1,476 @@ +#include +#include +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" + +const uint8_t idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int rej_uniform_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]) +{ + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1,15,14,13,-1,12,11,10, + -1, 9, 8, 7,-1, 6, 5, 4, + -1,11,10, 9,-1, 8, 7, 6, + -1, 5, 4, 3,-1, 2, 1, 0); + + ctr = pos = 0; + while(pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if(ctr > N - 8) break; + } + + uint32_t t; + while(ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + r[ctr++] = t; + } + + return ctr; +} + +#if ETA == 2 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1, f2; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(ETA); + const __m256i bound = mask; + const __m256i v = _mm256_set1_epi32(-6560); + const __m256i p = _mm256_set1_epi32(5); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1,v); + f2 = _mm256_mullo_epi16(f2,p); + f1 = _mm256_add_epi32(f1,f2); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + r[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < N) { + t1 = t1 - (205*t1 >> 10)*5; + r[ctr++] = 2 - t1; + } + } + + return ctr; +} + +#elif ETA == 4 +unsigned int rej_eta_avx(int32_t * restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(4); + const __m256i bound = _mm256_set1_epi8(9); + + ctr = pos = 0; + while(ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0,4); + f0 = _mm256_or_si256(f0,f1); + f0 = _mm256_and_si256(f0,mask); + + f1 = _mm256_sub_epi8(f0,bound); + f0 = _mm256_sub_epi8(eta,f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm256_extracti128_si256(f0,1); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if(ctr > N - 8) break; + g0 = _mm_bsrli_si128(g0,8); + g1 = _mm_loadl_epi64((__m128i *)&idxlut[good]); + g1 = _mm_shuffle_epi8(g0,g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr],f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while(ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if(t0 < 9) + r[ctr++] = 4 - t0; + if(t1 < 9 && ctr < N) + r[ctr++] = 4 - t1; + } + + return ctr; +} +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.h new file mode 100644 index 0000000000..61f3f357a5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rejsample.h @@ -0,0 +1,28 @@ +#ifndef REJSAMPLE_H +#define REJSAMPLE_H + +#include +#include "params.h" +#include "symmetric.h" + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#if ETA == 2 +#define REJ_UNIFORM_ETA_NBLOCKS ((136+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define REJ_UNIFORM_ETA_NBLOCKS ((227+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +#endif +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES) + +#define idxlut DILITHIUM_NAMESPACE(idxlut) +extern const uint8_t idxlut[256][8]; + +#define rej_uniform_avx DILITHIUM_NAMESPACE(rej_uniform_avx) +unsigned int rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN+8]); + +#define rej_eta_avx DILITHIUM_NAMESPACE(rej_eta_avx) +unsigned int rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]); + +#endif + diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rounding.c new file mode 100644 index 0000000000..3ada656776 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/rounding.c @@ -0,0 +1,200 @@ +#include +#include +#include +#include "params.h" +#include "rounding.h" +#include "rejsample.h" +#include "consts.h" + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D-1)) - 1); + + for(i = 0; i < N/8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f,half); + f0 = _mm256_and_si256(f1,mask); + f1 = _mm256_srli_epi32(f1,D); + f0 = _mm256_sub_epi32(f,f0); + _mm256_store_si256(&a1[i],f1); + _mm256_store_si256(&a0[i],f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +#if GAMMA2 == (Q-1)/32 +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) +{ + unsigned int i; + __m256i f,f0,f1; + const __m256i q = _mm256_load_si256(&qdata.vec[_8XQ/8]); + const __m256i hq = _mm256_srli_epi32(q,1); + const __m256i v = _mm256_set1_epi32(1025); + const __m256i alpha = _mm256_set1_epi32(2*GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(512); + const __m256i mask = _mm256_set1_epi32(15); + + for(i=0;i +#include +#include "params.h" + +#define power2round_avx DILITHIUM_NAMESPACE(power2round_avx) +void power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define decompose_avx DILITHIUM_NAMESPACE(decompose_avx) +void decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +#define make_hint_avx DILITHIUM_NAMESPACE(make_hint_avx) +unsigned int make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +#define use_hint_avx DILITHIUM_NAMESPACE(use_hint_avx) +void use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.S b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.S new file mode 100644 index 0000000000..133e05132b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.S @@ -0,0 +1,52 @@ +#include "consts.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(nttunpack_avx) +cdecl(nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.inc b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.inc new file mode 100644 index 0000000000..73e9ffe03c --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.c new file mode 100644 index 0000000000..a39f8515c4 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.c @@ -0,0 +1,445 @@ +#include +#include +#include "align.h" +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch(i) { + case 0: + polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; +#if K > 4 + case 4: + polyvec_matrix_expand_row4(buf, buf + 1, rho); + *row = buf; + break; + case 5: + polyvec_matrix_expand_row5(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif +#if K > 6 + case 6: + polyvec_matrix_expand_row6(buf, buf + 1, rho); + *row = buf; + break; + case 7: + polyvec_matrix_expand_row7(buf + 1, buf, rho); + *row = buf + 1; + break; +#endif + } +} + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ +#if K == 4 && L == 4 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s2.vec[0], &s2.vec[1], &s2.vec[2], &s2.vec[3], rhoprime, 4, 5, 6, 7); +#elif K == 6 && L == 5 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s2.vec[0], &s2.vec[1], &s2.vec[2], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[3], &s2.vec[4], &s2.vec[5], &t0, rhoprime, 8, 9, 10, 11); +#elif K == 8 && L == 7 + poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + poly_uniform_eta_4x(&s1.vec[4], &s1.vec[5], &s1.vec[6], &s2.vec[0], rhoprime, 4, 5, 6, 7); + poly_uniform_eta_4x(&s2.vec[1], &s2.vec[2], &s2.vec[3], &s2.vec[4], rhoprime, 8, 9, 10, 11); + poly_uniform_eta_4x(&s2.vec[5], &s2.vec[6], &s2.vec[7], &t0, rhoprime, 12, 13, 14, 15); +#else +#error +#endif + + /* Pack secret vectors */ + for(i = 0; i < L; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + i*POLYETA_PACKEDBYTES, &s1.vec[i]); + for(i = 0; i < K; i++) + polyeta_pack(sk + 2*SEEDBYTES + TRBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + + /* Transform s1 */ + polyvecl_ntt(&s1); + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + poly_invntt_tomont(&t1); + + /* Add error polynomial */ + poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + poly_caddq(&t1); + poly_power2round(&t1, &t0, &t1); + polyt1_pack(pk + SEEDBYTES + i*POLYT1_PACKEDBYTES, &t1); + polyt0_pack(sk + 2*SEEDBYTES + TRBYTES + (L+K)*POLYETA_PACKEDBYTES + i*POLYT0_PACKEDBYTES, &t0); + } + + /* Compute H(rho, t1) and store in secret key */ + shake256(sk + 2*SEEDBYTES, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *rnd, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + memset(rnd, 0, RNDBYTES); +#endif + shake256(rhoprime, CRHBYTES, key, SEEDBYTES + RNDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + polyvec_matrix_expand(mat, rho); + polyvecl_ntt(&s1); + polyveck_ntt(&s2); + polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ +#if L == 4 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + nonce += 4; +#elif L == 5 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1(&z.vec[4], rhoprime, nonce + 4); + nonce += 5; +#elif L == 7 + poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + poly_uniform_gamma1_4x(&z.vec[4], &z.vec[5], &z.vec[6], &tmp, + rhoprime, nonce + 4, nonce + 5, nonce + 6, 0); + nonce += 7; +#else +#error +#endif + + /* Matrix-vector product */ + tmpv.y = z; + polyvecl_ntt(&tmpv.y); + polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + polyveck_caddq(&w1); + polyveck_decompose(&w1, &tmpv.w0, &w1); + polyveck_pack_w1(sig, &w1); + + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, CTILDEBYTES, &state); + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for(i = 0; i < L; i++) { + poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + poly_invntt_tomont(&tmp); + poly_add(&z.vec[i], &z.vec[i], &tmp); + poly_reduce(&z.vec[i]); + if(poly_chknorm(&z.vec[i], GAMMA1 - BETA)) + goto rej; + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for(i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + poly_invntt_tomont(&tmp); + poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + poly_reduce(&tmpv.w0.vec[i]); + if(poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) + goto rej; + + /* Compute hints */ + poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + poly_invntt_tomont(&tmp); + poly_reduce(&tmp); + if(poly_chknorm(&tmp, GAMMA2)) + goto rej; + + poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if(pos + n > OMEGA) + goto rej; + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + shake256_inc_ctx_release(&state); + /* Pack z into signature */ + for(i = 0; i < L; i++) + polyz_pack(sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES, &z.vec[i]); + + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K*POLYW1_PACKEDBYTES+14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + CTILDEBYTES + L*POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand challenge */ + poly_challenge(&c, sig); + poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for(i = 0; i < L; i++) { + polyz_unpack(&z.vec[i], sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES); + poly_ntt(&z.vec[i]); + } + + for(i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + polyt1_unpack(&h, pk + SEEDBYTES + i*POLYT1_PACKEDBYTES); + poly_shiftl(&h); + poly_ntt(&h); + poly_pointwise_montgomery(&h, &c, &h); + + poly_sub(&w1, &w1, &h); + poly_reduce(&w1); + poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if(hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) + return -1; + + for(j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > pos && hint[j] <= hint[j-1]) return -1; + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + poly_caddq(&w1); + poly_use_hint(&w1, &w1, &h); + polyw1_pack(buf.coeffs + i*POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for(j = pos; j < OMEGA; ++j) + if(hint[j]) return -1; + + /* Call random oracle and verify challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(buf.coeffs[i] != sig[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric.h new file mode 100644 index 0000000000..fa49963ae3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_avx2/symmetric.h @@ -0,0 +1,28 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/LICENSE b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/LICENSE new file mode 100644 index 0000000000..cddfe615c6 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/LICENSE @@ -0,0 +1,7 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); +or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). + +For Keccak and the random number generator +we are using public-domain code from sources +and by authors listed in comments on top of +the respective files. diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/api.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/api.h new file mode 100644 index 0000000000..78caa5c728 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/api.h @@ -0,0 +1,88 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_ref_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_ref_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_ref_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_ref_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_ref_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_ref_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_ref_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_ref_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_ref_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_ref_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_ref_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_ref_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_ref_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/config.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/config.h new file mode 100644 index 0000000000..eddf13f5ea --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +#define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "ML-DSA-44-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_ref_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "ML-DSA-65-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_ref_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "ML-DSA-87-ipd" +#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_ref +#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_ref_##s +#endif + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.c new file mode 100644 index 0000000000..5ea8b530e1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.c @@ -0,0 +1,98 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for(len = 128; len > 0; len >>= 1) { + for(start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for(j = start; j < start + len; ++j) { + t = montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for(len = 1; len < N; len <<= 1) { + for(start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for(j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for(j = 0; j < N; ++j) { + a[j] = montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.h new file mode 100644 index 0000000000..731132d5cd --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/ntt.h @@ -0,0 +1,13 @@ +#ifndef NTT_H +#define NTT_H + +#include +#include "params.h" + +#define ntt DILITHIUM_NAMESPACE(ntt) +void ntt(int32_t a[N]); + +#define invntt_tomont DILITHIUM_NAMESPACE(invntt_tomont) +void invntt_tomont(int32_t a[N]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.c new file mode 100644 index 0000000000..039a686da3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.c @@ -0,0 +1,237 @@ +#include "params.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + pk[i] = rho[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_pack(pk + i*POLYT1_PACKEDBYTES, &t1->vec[i]); +} + +/************************************************* +* Name: unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[CRYPTO_PUBLICKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = pk[i]; + pk += SEEDBYTES; + + for(i = 0; i < K; ++i) + polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = rho[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + sk[i] = key[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + sk[i] = tr[i]; + sk += TRBYTES; + + for(i = 0; i < L; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s1->vec[i]); + sk += L*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyeta_pack(sk + i*POLYETA_PACKEDBYTES, &s2->vec[i]); + sk += K*POLYETA_PACKEDBYTES; + + for(i = 0; i < K; ++i) + polyt0_pack(sk + i*POLYT0_PACKEDBYTES, &t0->vec[i]); +} + +/************************************************* +* Name: unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]) +{ + unsigned int i; + + for(i = 0; i < SEEDBYTES; ++i) + rho[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < SEEDBYTES; ++i) + key[i] = sk[i]; + sk += SEEDBYTES; + + for(i = 0; i < TRBYTES; ++i) + tr[i] = sk[i]; + sk += TRBYTES; + + for(i=0; i < L; ++i) + polyeta_unpack(&s1->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += L*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyeta_unpack(&s2->vec[i], sk + i*POLYETA_PACKEDBYTES); + sk += K*POLYETA_PACKEDBYTES; + + for(i=0; i < K; ++i) + polyt0_unpack(&t0->vec[i], sk + i*POLYT0_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void pack_sig(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES], + const polyvecl *z, + const polyveck *h) +{ + unsigned int i, j, k; + + for(i=0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_pack(sig + i*POLYZ_PACKEDBYTES, &z->vec[i]); + sig += L*POLYZ_PACKEDBYTES; + + /* Encode h */ + for(i = 0; i < OMEGA + K; ++i) + sig[i] = 0; + + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + if(h->vec[i].coeffs[j] != 0) + sig[k++] = j; + + sig[OMEGA + i] = k; + } +} + +/************************************************* +* Name: unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int unpack_sig(uint8_t c[CTILDEBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[CRYPTO_BYTES]) +{ + unsigned int i, j, k; + + for(i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + + for(i = 0; i < L; ++i) + polyz_unpack(&z->vec[i], sig + i*POLYZ_PACKEDBYTES); + sig += L*POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for(i = 0; i < K; ++i) { + for(j = 0; j < N; ++j) + h->vec[i].coeffs[j] = 0; + + if(sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + return 1; + + for(j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if(j > k && sig[j] <= sig[j-1]) return 1; + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for(j = k; j < OMEGA; ++j) + if(sig[j]) + return 1; + + return 0; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.h new file mode 100644 index 0000000000..8e47728ce3 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/packing.h @@ -0,0 +1,38 @@ +#ifndef PACKING_H +#define PACKING_H + +#include +#include "params.h" +#include "polyvec.h" + +#define pack_pk DILITHIUM_NAMESPACE(pack_pk) +void pack_pk(uint8_t pk[CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +#define pack_sk DILITHIUM_NAMESPACE(pack_sk) +void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[TRBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +#define pack_sig DILITHIUM_NAMESPACE(pack_sig) +void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h); + +#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk) +void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]); + +#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk) +void unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[TRBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[CRYPTO_SECRETKEYBYTES]); + +#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig) +int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/params.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/params.h new file mode 100644 index 0000000000..1e8a7b505b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/params.h @@ -0,0 +1,80 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include "config.h" + +#define SEEDBYTES 32 +#define CRHBYTES 64 +#define TRBYTES 64 +#define RNDBYTES 32 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#if DILITHIUM_MODE == 2 +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define CTILDEBYTES 32 + +#elif DILITHIUM_MODE == 3 +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define CTILDEBYTES 48 + +#elif DILITHIUM_MODE == 5 +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define CTILDEBYTES 64 + +#endif + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#if GAMMA1 == (1 << 17) +#define POLYZ_PACKEDBYTES 576 +#elif GAMMA1 == (1 << 19) +#define POLYZ_PACKEDBYTES 640 +#endif + +#if GAMMA2 == (Q-1)/88 +#define POLYW1_PACKEDBYTES 192 +#elif GAMMA2 == (Q-1)/32 +#define POLYW1_PACKEDBYTES 128 +#endif + +#if ETA == 2 +#define POLYETA_PACKEDBYTES 96 +#elif ETA == 4 +#define POLYETA_PACKEDBYTES 128 +#endif + +#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES \ + + TRBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define CRYPTO_BYTES (CTILDEBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.c new file mode 100644 index 0000000000..7983aacdd1 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.c @@ -0,0 +1,911 @@ +#include +#include "params.h" +#include "poly.h" +#include "ntt.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" + +#ifdef DBENCH +#include "test/cpucycles.h" +extern const uint64_t timing_overhead; +extern uint64_t *tred, *tadd, *tmul, *tround, *tsample, *tpack; +#define DBENCH_START() uint64_t time = cpucycles() +#define DBENCH_STOP(t) t += cpucycles() - time - timing_overhead +#else +#define DBENCH_START() +#define DBENCH_STOP(t) +#endif + +/************************************************* +* Name: poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = reduce32(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] = caddq(a->coeffs[i]); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a->coeffs[i] <<= D; + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_ntt(poly *a) { + DBENCH_START(); + + ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void poly_invntt_tomont(poly *a) { + DBENCH_START(); + + invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + c->coeffs[i] = montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = power2round(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + a1->coeffs[i] = decompose(&a0->coeffs[i], a->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for(i = 0; i < N; ++i) { + h->coeffs[i] = make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N; ++i) + b->coeffs[i] = use_hint(a->coeffs[i], h->coeffs[i]); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if(B > (Q-1)/8) + return 1; + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for(i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2*a->coeffs[i]); + + if(t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if(t < Q) + a[ctr++] = t; + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) +{ + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while(ctr < N) { + off = buflen % 3; + for(i = 0; i < off; ++i) + buf[i] = buf[buflen - off + i]; + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while(ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + +#if ETA == 2 + if(t0 < 15) { + t0 = t0 - (205*t0 >> 10)*5; + a[ctr++] = 2 - t0; + } + if(t1 < 15 && ctr < len) { + t1 = t1 - (205*t1 >> 10)*5; + a[ctr++] = 2 - t1; + } +#elif ETA == 4 + if(t0 < 9) + a[ctr++] = 4 - t0; + if(t1 < 9 && ctr < len) + a[ctr++] = 4 - t1; +#endif + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#if ETA == 2 +#define POLY_UNIFORM_ETA_NBLOCKS ((136 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#elif ETA == 4 +#define POLY_UNIFORM_ETA_NBLOCKS ((227 + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +#endif +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while(ctr < N) { + stream256_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM256_BLOCKBYTES); + } + stream256_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) +{ + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + polyz_unpack(a, buf); +} + +/************************************************* +* Name: challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_squeezeblocks(buf, 1, &state); + + signs = 0; + for(i = 0; i < 8; ++i) + signs |= (uint64_t)buf[i] << 8*i; + pos = 8; + + for(i = 0; i < N; ++i) + c->coeffs[i] = 0; + for(i = N-TAU; i < N; ++i) { + do { + if(pos >= SHAKE256_RATE) { + shake256_squeezeblocks(buf, 1, &state); + pos = 0; + } + + b = buf[pos++]; + } while(b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2*(signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + t[0] = ETA - a->coeffs[8*i+0]; + t[1] = ETA - a->coeffs[8*i+1]; + t[2] = ETA - a->coeffs[8*i+2]; + t[3] = ETA - a->coeffs[8*i+3]; + t[4] = ETA - a->coeffs[8*i+4]; + t[5] = ETA - a->coeffs[8*i+5]; + t[6] = ETA - a->coeffs[8*i+6]; + t[7] = ETA - a->coeffs[8*i+7]; + + r[3*i+0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + t[0] = ETA - a->coeffs[2*i+0]; + t[1] = ETA - a->coeffs[2*i+1]; + r[i] = t[0] | (t[1] << 4); + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if ETA == 2 + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; + r->coeffs[8*i+1] = (a[3*i+0] >> 3) & 7; + r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; + r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; + r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; + r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; + r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; + r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; + + r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; + } +#elif ETA == 4 + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[i] & 0x0F; + r->coeffs[2*i+1] = a[i] >> 4; + r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r[5*i+0] = (a->coeffs[4*i+0] >> 0); + r[5*i+1] = (a->coeffs[4*i+0] >> 8) | (a->coeffs[4*i+1] << 2); + r[5*i+2] = (a->coeffs[4*i+1] >> 6) | (a->coeffs[4*i+2] << 4); + r[5*i+3] = (a->coeffs[4*i+2] >> 4) | (a->coeffs[4*i+3] << 6); + r[5*i+4] = (a->coeffs[4*i+3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = ((a[5*i+0] >> 0) | ((uint32_t)a[5*i+1] << 8)) & 0x3FF; + r->coeffs[4*i+1] = ((a[5*i+1] >> 2) | ((uint32_t)a[5*i+2] << 6)) & 0x3FF; + r->coeffs[4*i+2] = ((a[5*i+2] >> 4) | ((uint32_t)a[5*i+3] << 4)) & 0x3FF; + r->coeffs[4*i+3] = ((a[5*i+3] >> 6) | ((uint32_t)a[5*i+4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + t[0] = (1 << (D-1)) - a->coeffs[8*i+0]; + t[1] = (1 << (D-1)) - a->coeffs[8*i+1]; + t[2] = (1 << (D-1)) - a->coeffs[8*i+2]; + t[3] = (1 << (D-1)) - a->coeffs[8*i+3]; + t[4] = (1 << (D-1)) - a->coeffs[8*i+4]; + t[5] = (1 << (D-1)) - a->coeffs[8*i+5]; + t[6] = (1 << (D-1)) - a->coeffs[8*i+6]; + t[7] = (1 << (D-1)) - a->coeffs[8*i+7]; + + r[13*i+ 0] = t[0]; + r[13*i+ 1] = t[0] >> 8; + r[13*i+ 1] |= t[1] << 5; + r[13*i+ 2] = t[1] >> 3; + r[13*i+ 3] = t[1] >> 11; + r[13*i+ 3] |= t[2] << 2; + r[13*i+ 4] = t[2] >> 6; + r[13*i+ 4] |= t[3] << 7; + r[13*i+ 5] = t[3] >> 1; + r[13*i+ 6] = t[3] >> 9; + r[13*i+ 6] |= t[4] << 4; + r[13*i+ 7] = t[4] >> 4; + r[13*i+ 8] = t[4] >> 12; + r[13*i+ 8] |= t[5] << 1; + r[13*i+ 9] = t[5] >> 7; + r[13*i+ 9] |= t[6] << 6; + r[13*i+10] = t[6] >> 2; + r[13*i+11] = t[6] >> 10; + r[13*i+11] |= t[7] << 3; + r[13*i+12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for(i = 0; i < N/8; ++i) { + r->coeffs[8*i+0] = a[13*i+0]; + r->coeffs[8*i+0] |= (uint32_t)a[13*i+1] << 8; + r->coeffs[8*i+0] &= 0x1FFF; + + r->coeffs[8*i+1] = a[13*i+1] >> 5; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+2] << 3; + r->coeffs[8*i+1] |= (uint32_t)a[13*i+3] << 11; + r->coeffs[8*i+1] &= 0x1FFF; + + r->coeffs[8*i+2] = a[13*i+3] >> 2; + r->coeffs[8*i+2] |= (uint32_t)a[13*i+4] << 6; + r->coeffs[8*i+2] &= 0x1FFF; + + r->coeffs[8*i+3] = a[13*i+4] >> 7; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+5] << 1; + r->coeffs[8*i+3] |= (uint32_t)a[13*i+6] << 9; + r->coeffs[8*i+3] &= 0x1FFF; + + r->coeffs[8*i+4] = a[13*i+6] >> 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+7] << 4; + r->coeffs[8*i+4] |= (uint32_t)a[13*i+8] << 12; + r->coeffs[8*i+4] &= 0x1FFF; + + r->coeffs[8*i+5] = a[13*i+8] >> 1; + r->coeffs[8*i+5] |= (uint32_t)a[13*i+9] << 7; + r->coeffs[8*i+5] &= 0x1FFF; + + r->coeffs[8*i+6] = a[13*i+9] >> 6; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+10] << 2; + r->coeffs[8*i+6] |= (uint32_t)a[13*i+11] << 10; + r->coeffs[8*i+6] &= 0x1FFF; + + r->coeffs[8*i+7] = a[13*i+11] >> 3; + r->coeffs[8*i+7] |= (uint32_t)a[13*i+12] << 5; + r->coeffs[8*i+7] &= 0x1FFF; + + r->coeffs[8*i+0] = (1 << (D-1)) - r->coeffs[8*i+0]; + r->coeffs[8*i+1] = (1 << (D-1)) - r->coeffs[8*i+1]; + r->coeffs[8*i+2] = (1 << (D-1)) - r->coeffs[8*i+2]; + r->coeffs[8*i+3] = (1 << (D-1)) - r->coeffs[8*i+3]; + r->coeffs[8*i+4] = (1 << (D-1)) - r->coeffs[8*i+4]; + r->coeffs[8*i+5] = (1 << (D-1)) - r->coeffs[8*i+5]; + r->coeffs[8*i+6] = (1 << (D-1)) - r->coeffs[8*i+6]; + r->coeffs[8*i+7] = (1 << (D-1)) - r->coeffs[8*i+7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + t[0] = GAMMA1 - a->coeffs[4*i+0]; + t[1] = GAMMA1 - a->coeffs[4*i+1]; + t[2] = GAMMA1 - a->coeffs[4*i+2]; + t[3] = GAMMA1 - a->coeffs[4*i+3]; + + r[9*i+0] = t[0]; + r[9*i+1] = t[0] >> 8; + r[9*i+2] = t[0] >> 16; + r[9*i+2] |= t[1] << 2; + r[9*i+3] = t[1] >> 6; + r[9*i+4] = t[1] >> 14; + r[9*i+4] |= t[2] << 4; + r[9*i+5] = t[2] >> 4; + r[9*i+6] = t[2] >> 12; + r[9*i+6] |= t[3] << 6; + r[9*i+7] = t[3] >> 2; + r[9*i+8] = t[3] >> 10; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + t[0] = GAMMA1 - a->coeffs[2*i+0]; + t[1] = GAMMA1 - a->coeffs[2*i+1]; + + r[5*i+0] = t[0]; + r[5*i+1] = t[0] >> 8; + r[5*i+2] = t[0] >> 16; + r[5*i+2] |= t[1] << 4; + r[5*i+3] = t[1] >> 4; + r[5*i+4] = t[1] >> 12; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA1 == (1 << 17) + for(i = 0; i < N/4; ++i) { + r->coeffs[4*i+0] = a[9*i+0]; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+1] << 8; + r->coeffs[4*i+0] |= (uint32_t)a[9*i+2] << 16; + r->coeffs[4*i+0] &= 0x3FFFF; + + r->coeffs[4*i+1] = a[9*i+2] >> 2; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+3] << 6; + r->coeffs[4*i+1] |= (uint32_t)a[9*i+4] << 14; + r->coeffs[4*i+1] &= 0x3FFFF; + + r->coeffs[4*i+2] = a[9*i+4] >> 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+5] << 4; + r->coeffs[4*i+2] |= (uint32_t)a[9*i+6] << 12; + r->coeffs[4*i+2] &= 0x3FFFF; + + r->coeffs[4*i+3] = a[9*i+6] >> 6; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+7] << 2; + r->coeffs[4*i+3] |= (uint32_t)a[9*i+8] << 10; + r->coeffs[4*i+3] &= 0x3FFFF; + + r->coeffs[4*i+0] = GAMMA1 - r->coeffs[4*i+0]; + r->coeffs[4*i+1] = GAMMA1 - r->coeffs[4*i+1]; + r->coeffs[4*i+2] = GAMMA1 - r->coeffs[4*i+2]; + r->coeffs[4*i+3] = GAMMA1 - r->coeffs[4*i+3]; + } +#elif GAMMA1 == (1 << 19) + for(i = 0; i < N/2; ++i) { + r->coeffs[2*i+0] = a[5*i+0]; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+1] << 8; + r->coeffs[2*i+0] |= (uint32_t)a[5*i+2] << 16; + r->coeffs[2*i+0] &= 0xFFFFF; + + r->coeffs[2*i+1] = a[5*i+2] >> 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+3] << 4; + r->coeffs[2*i+1] |= (uint32_t)a[5*i+4] << 12; + /* r->coeffs[2*i+1] &= 0xFFFFF; */ /* No effect, since we're anyway at 20 bits */ + + r->coeffs[2*i+0] = GAMMA1 - r->coeffs[2*i+0]; + r->coeffs[2*i+1] = GAMMA1 - r->coeffs[2*i+1]; + } +#endif + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + +#if GAMMA2 == (Q-1)/88 + for(i = 0; i < N/4; ++i) { + r[3*i+0] = a->coeffs[4*i+0]; + r[3*i+0] |= a->coeffs[4*i+1] << 6; + r[3*i+1] = a->coeffs[4*i+1] >> 2; + r[3*i+1] |= a->coeffs[4*i+2] << 4; + r[3*i+2] = a->coeffs[4*i+2] >> 4; + r[3*i+2] |= a->coeffs[4*i+3] << 2; + } +#elif GAMMA2 == (Q-1)/32 + for(i = 0; i < N/2; ++i) + r[i] = a->coeffs[2*i+0] | (a->coeffs[2*i+1] << 4); +#endif + + DBENCH_STOP(*tpack); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.h new file mode 100644 index 0000000000..d2fd989b6a --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/poly.h @@ -0,0 +1,79 @@ +#ifndef POLY_H +#define POLY_H + +#include +#include "params.h" + +typedef struct { + int32_t coeffs[N]; +} poly; + +#define poly_reduce DILITHIUM_NAMESPACE(poly_reduce) +void poly_reduce(poly *a); +#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq) +void poly_caddq(poly *a); + +#define poly_add DILITHIUM_NAMESPACE(poly_add) +void poly_add(poly *c, const poly *a, const poly *b); +#define poly_sub DILITHIUM_NAMESPACE(poly_sub) +void poly_sub(poly *c, const poly *a, const poly *b); +#define poly_shiftl DILITHIUM_NAMESPACE(poly_shiftl) +void poly_shiftl(poly *a); + +#define poly_ntt DILITHIUM_NAMESPACE(poly_ntt) +void poly_ntt(poly *a); +#define poly_invntt_tomont DILITHIUM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *a); +#define poly_pointwise_montgomery DILITHIUM_NAMESPACE(poly_pointwise_montgomery) +void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +#define poly_power2round DILITHIUM_NAMESPACE(poly_power2round) +void poly_power2round(poly *a1, poly *a0, const poly *a); +#define poly_decompose DILITHIUM_NAMESPACE(poly_decompose) +void poly_decompose(poly *a1, poly *a0, const poly *a); +#define poly_make_hint DILITHIUM_NAMESPACE(poly_make_hint) +unsigned int poly_make_hint(poly *h, const poly *a0, const poly *a1); +#define poly_use_hint DILITHIUM_NAMESPACE(poly_use_hint) +void poly_use_hint(poly *b, const poly *a, const poly *h); + +#define poly_chknorm DILITHIUM_NAMESPACE(poly_chknorm) +int poly_chknorm(const poly *a, int32_t B); +#define poly_uniform DILITHIUM_NAMESPACE(poly_uniform) +void poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +#define poly_uniform_eta DILITHIUM_NAMESPACE(poly_uniform_eta) +void poly_uniform_eta(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_uniform_gamma1 DILITHIUM_NAMESPACE(poly_uniform_gamma1) +void poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +#define poly_challenge DILITHIUM_NAMESPACE(poly_challenge) +void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define polyeta_pack DILITHIUM_NAMESPACE(polyeta_pack) +void polyeta_pack(uint8_t *r, const poly *a); +#define polyeta_unpack DILITHIUM_NAMESPACE(polyeta_unpack) +void polyeta_unpack(poly *r, const uint8_t *a); + +#define polyt1_pack DILITHIUM_NAMESPACE(polyt1_pack) +void polyt1_pack(uint8_t *r, const poly *a); +#define polyt1_unpack DILITHIUM_NAMESPACE(polyt1_unpack) +void polyt1_unpack(poly *r, const uint8_t *a); + +#define polyt0_pack DILITHIUM_NAMESPACE(polyt0_pack) +void polyt0_pack(uint8_t *r, const poly *a); +#define polyt0_unpack DILITHIUM_NAMESPACE(polyt0_unpack) +void polyt0_unpack(poly *r, const uint8_t *a); + +#define polyz_pack DILITHIUM_NAMESPACE(polyz_pack) +void polyz_pack(uint8_t *r, const poly *a); +#define polyz_unpack DILITHIUM_NAMESPACE(polyz_unpack) +void polyz_unpack(poly *r, const uint8_t *a); + +#define polyw1_pack DILITHIUM_NAMESPACE(polyw1_pack) +void polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.c new file mode 100644 index 0000000000..40032b656b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.c @@ -0,0 +1,389 @@ +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for(i = 0; i < K; ++i) + for(j = 0; j < L; ++j) + poly_uniform(&mat[i].vec[j], rho, (i << 8) + j); +} + +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_uniform_gamma1(&v->vec[i], seed, L*nonce + i); +} + +void polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_ntt(&v->vec[i]); +} + +void polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for(i = 0; i < L; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + +/************************************************* +* Name: polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) +{ + unsigned int i; + poly t; + + poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for(i = 1; i < L; ++i) { + poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + poly_add(w, w, &t); + } +} + +/************************************************* +* Name: polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < L; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_uniform_eta(&v->vec[i], seed, nonce++); +} + +/************************************************* +* Name: polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_reduce(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_reduce(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_caddq(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_caddq(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_shiftl(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_shiftl(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_ntt(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_ntt(&v->vec[i]); +} + +/************************************************* +* Name: polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_invntt_tomont(&v->vec[i]); +} + +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); +} + + +/************************************************* +* Name: polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for(i = 0; i < K; ++i) + if(poly_chknorm(&v->vec[i], bound)) + return 1; + + return 0; +} + +/************************************************* +* Name: polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); +} + +/************************************************* +* Name: polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) +{ + unsigned int i, s = 0; + + for(i = 0; i < K; ++i) + s += poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + + return s; +} + +/************************************************* +* Name: polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for(i = 0; i < K; ++i) + poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); +} + +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for(i = 0; i < K; ++i) + polyw1_pack(&r[i*POLYW1_PACKEDBYTES], &w1->vec[i]); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.h new file mode 100644 index 0000000000..615ac52990 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/polyvec.h @@ -0,0 +1,93 @@ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +#define polyvecl_uniform_eta DILITHIUM_NAMESPACE(polyvecl_uniform_eta) +void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_uniform_gamma1 DILITHIUM_NAMESPACE(polyvecl_uniform_gamma1) +void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyvecl_reduce DILITHIUM_NAMESPACE(polyvecl_reduce) +void polyvecl_reduce(polyvecl *v); + +#define polyvecl_add DILITHIUM_NAMESPACE(polyvecl_add) +void polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +#define polyvecl_ntt DILITHIUM_NAMESPACE(polyvecl_ntt) +void polyvecl_ntt(polyvecl *v); +#define polyvecl_invntt_tomont DILITHIUM_NAMESPACE(polyvecl_invntt_tomont) +void polyvecl_invntt_tomont(polyvecl *v); +#define polyvecl_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyvecl_pointwise_poly_montgomery) +void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +#define polyvecl_pointwise_acc_montgomery \ + DILITHIUM_NAMESPACE(polyvecl_pointwise_acc_montgomery) +void polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +#define polyvecl_chknorm DILITHIUM_NAMESPACE(polyvecl_chknorm) +int polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +#define polyveck_uniform_eta DILITHIUM_NAMESPACE(polyveck_uniform_eta) +void polyveck_uniform_eta(polyveck *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +#define polyveck_reduce DILITHIUM_NAMESPACE(polyveck_reduce) +void polyveck_reduce(polyveck *v); +#define polyveck_caddq DILITHIUM_NAMESPACE(polyveck_caddq) +void polyveck_caddq(polyveck *v); + +#define polyveck_add DILITHIUM_NAMESPACE(polyveck_add) +void polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_sub DILITHIUM_NAMESPACE(polyveck_sub) +void polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +#define polyveck_shiftl DILITHIUM_NAMESPACE(polyveck_shiftl) +void polyveck_shiftl(polyveck *v); + +#define polyveck_ntt DILITHIUM_NAMESPACE(polyveck_ntt) +void polyveck_ntt(polyveck *v); +#define polyveck_invntt_tomont DILITHIUM_NAMESPACE(polyveck_invntt_tomont) +void polyveck_invntt_tomont(polyveck *v); +#define polyveck_pointwise_poly_montgomery DILITHIUM_NAMESPACE(polyveck_pointwise_poly_montgomery) +void polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +#define polyveck_chknorm DILITHIUM_NAMESPACE(polyveck_chknorm) +int polyveck_chknorm(const polyveck *v, int32_t B); + +#define polyveck_power2round DILITHIUM_NAMESPACE(polyveck_power2round) +void polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_decompose DILITHIUM_NAMESPACE(polyveck_decompose) +void polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +#define polyveck_make_hint DILITHIUM_NAMESPACE(polyveck_make_hint) +unsigned int polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +#define polyveck_use_hint DILITHIUM_NAMESPACE(polyveck_use_hint) +void polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); + +#define polyveck_pack_w1 DILITHIUM_NAMESPACE(polyveck_pack_w1) +void polyveck_pack_w1(uint8_t r[K*POLYW1_PACKEDBYTES], const polyveck *w1); + +#define polyvec_matrix_expand DILITHIUM_NAMESPACE(polyvec_matrix_expand) +void polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +#define polyvec_matrix_pointwise_montgomery DILITHIUM_NAMESPACE(polyvec_matrix_pointwise_montgomery) +void polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.c new file mode 100644 index 0000000000..75feff8bc5 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.c @@ -0,0 +1,69 @@ +#include +#include "params.h" +#include "reduce.h" + +/************************************************* +* Name: montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t montgomery_reduce(int64_t a) { + int32_t t; + + t = (int64_t)(int32_t)a*QINV; + t = (a - (int64_t)t*Q) >> 32; + return t; +} + +/************************************************* +* Name: reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t*Q; + return t; +} + +/************************************************* +* Name: caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t freeze(int32_t a) { + a = reduce32(a); + a = caddq(a); + return a; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.h new file mode 100644 index 0000000000..26d9b4ee2e --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/reduce.h @@ -0,0 +1,22 @@ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "params.h" + +#define MONT -4186625 // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +#define montgomery_reduce DILITHIUM_NAMESPACE(montgomery_reduce) +int32_t montgomery_reduce(int64_t a); + +#define reduce32 DILITHIUM_NAMESPACE(reduce32) +int32_t reduce32(int32_t a); + +#define caddq DILITHIUM_NAMESPACE(caddq) +int32_t caddq(int32_t a); + +#define freeze DILITHIUM_NAMESPACE(freeze) +int32_t freeze(int32_t a); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.c new file mode 100644 index 0000000000..889f0a296b --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.c @@ -0,0 +1,102 @@ +#include +#include "params.h" +#include "rounding.h" + +/************************************************* +* Name: power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D-1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; +#if GAMMA2 == (Q-1)/32 + a1 = (a1*1025 + (1 << 21)) >> 22; + a1 &= 15; +#elif GAMMA2 == (Q-1)/88 + a1 = (a1*11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; +#endif + + *a0 = a - a1*2*GAMMA2; + *a0 -= (((Q-1)/2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int make_hint(int32_t a0, int32_t a1) { + if(a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) + return 1; + + return 0; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = decompose(&a0, a); + if(hint == 0) + return a1; + +#if GAMMA2 == (Q-1)/32 + if(a0 > 0) + return (a1 + 1) & 15; + else + return (a1 - 1) & 15; +#elif GAMMA2 == (Q-1)/88 + if(a0 > 0) + return (a1 == 43) ? 0 : a1 + 1; + else + return (a1 == 0) ? 43 : a1 - 1; +#endif +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.h new file mode 100644 index 0000000000..b72e8e8d66 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/rounding.h @@ -0,0 +1,19 @@ +#ifndef ROUNDING_H +#define ROUNDING_H + +#include +#include "params.h" + +#define power2round DILITHIUM_NAMESPACE(power2round) +int32_t power2round(int32_t *a0, int32_t a); + +#define decompose DILITHIUM_NAMESPACE(decompose) +int32_t decompose(int32_t *a0, int32_t a); + +#define make_hint DILITHIUM_NAMESPACE(make_hint) +unsigned int make_hint(int32_t a0, int32_t a1); + +#define use_hint DILITHIUM_NAMESPACE(use_hint) +int32_t use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.c new file mode 100644 index 0000000000..9298ad2177 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.c @@ -0,0 +1,341 @@ +#include +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + uint8_t tr[TRBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 2*SEEDBYTES + CRHBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + /* Expand matrix */ + polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + polyvecl_uniform_eta(&s1, rhoprime, 0); + polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + polyvecl_ntt(&s1hat); + polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + polyveck_reduce(&t1); + polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + polyveck_caddq(&t1); + polyveck_power2round(&t1, &t0, &t1); + pack_pk(pk, rho, &t1); + + /* Compute H(rho, t1) and write secret key */ + shake256(tr, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + unsigned int n; + uint8_t seedbuf[2*SEEDBYTES + TRBYTES + RNDBYTES + 2*CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime, *rnd; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + TRBYTES; + rnd = key + SEEDBYTES; + mu = rnd + RNDBYTES; + rhoprime = mu + CRHBYTES; + unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + + /* Compute mu = CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, TRBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + for(n=0;n OMEGA) + goto rej; + + shake256_inc_ctx_release(&state); + + /* Write signature */ + pack_sig(sig, sig, &z, &h); + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) +{ + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) +{ + unsigned int i; + uint8_t buf[K*POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[CTILDEBYTES]; + uint8_t c2[CTILDEBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if(siglen != CRYPTO_BYTES) + return -1; + + unpack_pk(rho, &t1, pk); + if(unpack_sig(c, &z, &h, sig)) + return -1; + if(polyvecl_chknorm(&z, GAMMA1 - BETA)) + return -1; + + /* Compute CRH(H(rho, t1), msg) */ + shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + poly_challenge(&cp, c); /* uses only the first SEEDBYTES bytes of c */ + polyvec_matrix_expand(mat, rho); + + polyvecl_ntt(&z); + polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + poly_ntt(&cp); + polyveck_shiftl(&t1); + polyveck_ntt(&t1); + polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + polyveck_sub(&w1, &w1, &t1); + polyveck_reduce(&w1); + polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + polyveck_caddq(&w1); + polyveck_use_hint(&w1, &w1, &h); + polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify challenge */ + shake256_inc_ctx_reset(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, CTILDEBYTES, &state); + shake256_inc_ctx_release(&state); + for(i = 0; i < CTILDEBYTES; ++i) + if(c[i] != c2[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) +{ + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.h new file mode 100644 index 0000000000..295f378c00 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/sign.h @@ -0,0 +1,36 @@ +#ifndef SIGN_H +#define SIGN_H + +#include +#include +#include "params.h" +#include "polyvec.h" +#include "poly.h" + +#define challenge DILITHIUM_NAMESPACE(challenge) +void challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +#define crypto_sign_keypair DILITHIUM_NAMESPACE(keypair) +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +#define crypto_sign_signature DILITHIUM_NAMESPACE(signature) +int crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign DILITHIUM_NAMESPACETOP +int crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +#define crypto_sign_verify DILITHIUM_NAMESPACE(verify) +int crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +#define crypto_sign_open DILITHIUM_NAMESPACE(open) +int crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric-shake.c b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric-shake.c new file mode 100644 index 0000000000..963f649817 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric-shake.c @@ -0,0 +1,28 @@ +#include +#include "params.h" +#include "symmetric.h" +#include "fips202.h" + +void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) +{ + uint8_t t[2]; + t[0] = nonce; + t[1] = nonce >> 8; + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric.h b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric.h new file mode 100644 index 0000000000..211de3b860 --- /dev/null +++ b/src/sig/ml_dsa/pqcrystals-dilithium-standard_ml-dsa-87-ipd_ref/symmetric.h @@ -0,0 +1,36 @@ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include "params.h" + +#include "fips202.h" + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +#define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init) +void dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +#define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init) +void dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define stream128_init(STATE, SEED, NONCE) \ + dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + +#endif diff --git a/src/sig/ml_dsa/sig_ml_dsa.h b/src/sig/ml_dsa/sig_ml_dsa.h new file mode 100644 index 0000000000..e4b94a7902 --- /dev/null +++ b/src/sig/ml_dsa/sig_ml_dsa.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: MIT + +#ifndef OQS_SIG_ML_DSA_H +#define OQS_SIG_ML_DSA_H + +#include + +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd) || defined(OQS_ENABLE_SIG_ml_dsa_44) +#define OQS_SIG_ml_dsa_44_ipd_length_public_key 1312 +#define OQS_SIG_ml_dsa_44_ipd_length_secret_key 2560 +#define OQS_SIG_ml_dsa_44_ipd_length_signature 2420 + +OQS_SIG *OQS_SIG_ml_dsa_44_ipd_new(void); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); + +#define OQS_SIG_ml_dsa_44_length_public_key OQS_SIG_ml_dsa_44_ipd_length_public_key +#define OQS_SIG_ml_dsa_44_length_secret_key OQS_SIG_ml_dsa_44_ipd_length_secret_key +#define OQS_SIG_ml_dsa_44_length_ciphertext OQS_SIG_ml_dsa_44_ipd_length_ciphertext +#define OQS_SIG_ml_dsa_44_length_shared_secret OQS_SIG_ml_dsa_44_ipd_length_shared_secret +OQS_SIG *OQS_SIG_ml_dsa_44_new(void); +#define OQS_SIG_ml_dsa_44_keypair OQS_SIG_ml_dsa_44_ipd_keypair +#define OQS_SIG_ml_dsa_44_encaps OQS_SIG_ml_dsa_44_ipd_encaps +#define OQS_SIG_ml_dsa_44_decaps OQS_SIG_ml_dsa_44_ipd_decaps +#endif + +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd) || defined(OQS_ENABLE_SIG_ml_dsa_65) +#define OQS_SIG_ml_dsa_65_ipd_length_public_key 1952 +#define OQS_SIG_ml_dsa_65_ipd_length_secret_key 4032 +#define OQS_SIG_ml_dsa_65_ipd_length_signature 3309 + +OQS_SIG *OQS_SIG_ml_dsa_65_ipd_new(void); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); + +#define OQS_SIG_ml_dsa_65_length_public_key OQS_SIG_ml_dsa_65_ipd_length_public_key +#define OQS_SIG_ml_dsa_65_length_secret_key OQS_SIG_ml_dsa_65_ipd_length_secret_key +#define OQS_SIG_ml_dsa_65_length_ciphertext OQS_SIG_ml_dsa_65_ipd_length_ciphertext +#define OQS_SIG_ml_dsa_65_length_shared_secret OQS_SIG_ml_dsa_65_ipd_length_shared_secret +OQS_SIG *OQS_SIG_ml_dsa_65_new(void); +#define OQS_SIG_ml_dsa_65_keypair OQS_SIG_ml_dsa_65_ipd_keypair +#define OQS_SIG_ml_dsa_65_encaps OQS_SIG_ml_dsa_65_ipd_encaps +#define OQS_SIG_ml_dsa_65_decaps OQS_SIG_ml_dsa_65_ipd_decaps +#endif + +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd) || defined(OQS_ENABLE_SIG_ml_dsa_87) +#define OQS_SIG_ml_dsa_87_ipd_length_public_key 2592 +#define OQS_SIG_ml_dsa_87_ipd_length_secret_key 4896 +#define OQS_SIG_ml_dsa_87_ipd_length_signature 4627 + +OQS_SIG *OQS_SIG_ml_dsa_87_ipd_new(void); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_keypair(uint8_t *public_key, uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key); +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); + +#define OQS_SIG_ml_dsa_87_length_public_key OQS_SIG_ml_dsa_87_ipd_length_public_key +#define OQS_SIG_ml_dsa_87_length_secret_key OQS_SIG_ml_dsa_87_ipd_length_secret_key +#define OQS_SIG_ml_dsa_87_length_ciphertext OQS_SIG_ml_dsa_87_ipd_length_ciphertext +#define OQS_SIG_ml_dsa_87_length_shared_secret OQS_SIG_ml_dsa_87_ipd_length_shared_secret +OQS_SIG *OQS_SIG_ml_dsa_87_new(void); +#define OQS_SIG_ml_dsa_87_keypair OQS_SIG_ml_dsa_87_ipd_keypair +#define OQS_SIG_ml_dsa_87_encaps OQS_SIG_ml_dsa_87_ipd_encaps +#define OQS_SIG_ml_dsa_87_decaps OQS_SIG_ml_dsa_87_ipd_decaps +#endif + +#endif diff --git a/src/sig/ml_dsa/sig_ml_dsa_44_ipd.c b/src/sig/ml_dsa/sig_ml_dsa_44_ipd.c new file mode 100644 index 0000000000..62130cfe0a --- /dev/null +++ b/src/sig/ml_dsa/sig_ml_dsa_44_ipd.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd) + +OQS_SIG *OQS_SIG_ml_dsa_44_ipd_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_44_ipd; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 2; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_44_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_44_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_44_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_44_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_44_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_44_ipd_verify; + + return sig; +} + +/** Alias */ +OQS_SIG *OQS_SIG_ml_dsa_44_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_44; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 2; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_44_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_44_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_44_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_44_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_44_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_44_ipd_verify; + + return sig; +} + +extern int pqcrystals_ml_dsa_44_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_44_ipd_ref_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_44_ipd_ref_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); + +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2) +extern int pqcrystals_ml_dsa_44_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_44_ipd_avx2_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_44_ipd_avx2_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); +#endif + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_avx2_signature(signature, signature_len, message, message_len, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_44_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_avx2_verify(signature, signature_len, message, message_len, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_verify(signature, signature_len, message, message_len, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_44_ipd_ref_verify(signature, signature_len, message, message_len, public_key); +#endif +} + +#endif diff --git a/src/sig/ml_dsa/sig_ml_dsa_65_ipd.c b/src/sig/ml_dsa/sig_ml_dsa_65_ipd.c new file mode 100644 index 0000000000..142d20db01 --- /dev/null +++ b/src/sig/ml_dsa/sig_ml_dsa_65_ipd.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd) + +OQS_SIG *OQS_SIG_ml_dsa_65_ipd_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_65_ipd; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 3; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_65_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_65_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_65_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_65_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_65_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_65_ipd_verify; + + return sig; +} + +/** Alias */ +OQS_SIG *OQS_SIG_ml_dsa_65_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_65; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 3; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_65_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_65_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_65_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_65_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_65_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_65_ipd_verify; + + return sig; +} + +extern int pqcrystals_ml_dsa_65_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_65_ipd_ref_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_65_ipd_ref_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); + +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2) +extern int pqcrystals_ml_dsa_65_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_65_ipd_avx2_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_65_ipd_avx2_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); +#endif + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_avx2_signature(signature, signature_len, message, message_len, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_65_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_avx2_verify(signature, signature_len, message, message_len, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_verify(signature, signature_len, message, message_len, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_65_ipd_ref_verify(signature, signature_len, message, message_len, public_key); +#endif +} + +#endif diff --git a/src/sig/ml_dsa/sig_ml_dsa_87_ipd.c b/src/sig/ml_dsa/sig_ml_dsa_87_ipd.c new file mode 100644 index 0000000000..2bd6542e5f --- /dev/null +++ b/src/sig/ml_dsa/sig_ml_dsa_87_ipd.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: MIT + +#include + +#include + +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd) + +OQS_SIG *OQS_SIG_ml_dsa_87_ipd_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_87_ipd; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 5; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_87_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_87_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_87_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_87_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_87_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_87_ipd_verify; + + return sig; +} + +/** Alias */ +OQS_SIG *OQS_SIG_ml_dsa_87_new(void) { + + OQS_SIG *sig = malloc(sizeof(OQS_SIG)); + if (sig == NULL) { + return NULL; + } + sig->method_name = OQS_SIG_alg_ml_dsa_87; + sig->alg_version = "https://github.com/pq-crystals/dilithium/tree/standard"; + + sig->claimed_nist_level = 5; + sig->euf_cma = true; + + sig->length_public_key = OQS_SIG_ml_dsa_87_ipd_length_public_key; + sig->length_secret_key = OQS_SIG_ml_dsa_87_ipd_length_secret_key; + sig->length_signature = OQS_SIG_ml_dsa_87_ipd_length_signature; + + sig->keypair = OQS_SIG_ml_dsa_87_ipd_keypair; + sig->sign = OQS_SIG_ml_dsa_87_ipd_sign; + sig->verify = OQS_SIG_ml_dsa_87_ipd_verify; + + return sig; +} + +extern int pqcrystals_ml_dsa_87_ipd_ref_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_87_ipd_ref_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_87_ipd_ref_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); + +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2) +extern int pqcrystals_ml_dsa_87_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk); +extern int pqcrystals_ml_dsa_87_ipd_avx2_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); +extern int pqcrystals_ml_dsa_87_ipd_avx2_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); +#endif + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_avx2_keypair(public_key, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_keypair(public_key, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_keypair(public_key, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_avx2_signature(signature, signature_len, message, message_len, secret_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_signature(signature, signature_len, message, message_len, secret_key); +#endif +} + +OQS_API OQS_STATUS OQS_SIG_ml_dsa_87_ipd_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key) { +#if defined(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2) +#if defined(OQS_DIST_BUILD) + if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { +#endif /* OQS_DIST_BUILD */ + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_avx2_verify(signature, signature_len, message, message_len, public_key); +#if defined(OQS_DIST_BUILD) + } else { + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_verify(signature, signature_len, message, message_len, public_key); + } +#endif /* OQS_DIST_BUILD */ +#else + return (OQS_STATUS) pqcrystals_ml_dsa_87_ipd_ref_verify(signature, signature_len, message, message_len, public_key); +#endif +} + +#endif diff --git a/src/sig/sig.c b/src/sig/sig.c index c45084947f..b953af7562 100644 --- a/src/sig/sig.c +++ b/src/sig/sig.c @@ -18,6 +18,12 @@ OQS_API const char *OQS_SIG_alg_identifier(size_t i) { OQS_SIG_alg_dilithium_2, OQS_SIG_alg_dilithium_3, OQS_SIG_alg_dilithium_5, + OQS_SIG_alg_ml_dsa_44_ipd, + OQS_SIG_alg_ml_dsa_44, + OQS_SIG_alg_ml_dsa_65_ipd, + OQS_SIG_alg_ml_dsa_65, + OQS_SIG_alg_ml_dsa_87_ipd, + OQS_SIG_alg_ml_dsa_87, OQS_SIG_alg_falcon_512, OQS_SIG_alg_falcon_1024, OQS_SIG_alg_sphincs_sha2_128f_simple, @@ -31,8 +37,7 @@ OQS_API const char *OQS_SIG_alg_identifier(size_t i) { OQS_SIG_alg_sphincs_shake_192f_simple, OQS_SIG_alg_sphincs_shake_192s_simple, OQS_SIG_alg_sphincs_shake_256f_simple, - OQS_SIG_alg_sphincs_shake_256s_simple, - ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END + OQS_SIG_alg_sphincs_shake_256s_simple,///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END }; if (i >= OQS_SIG_algs_length) { return NULL; @@ -57,103 +62,161 @@ OQS_API int OQS_SIG_alg_is_enabled(const char *method_name) { #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_dilithium_3)) { #ifdef OQS_ENABLE_SIG_dilithium_3 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_dilithium_5)) { #ifdef OQS_ENABLE_SIG_dilithium_5 return 1; #else return 0; #endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_44_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_44_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_44)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_44 + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_65_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_65_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_65)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_65 + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_87_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_87_ipd + return 1; +#else + return 0; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_87)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_87 + return 1; +#else + return 0; +#endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_falcon_512)) { #ifdef OQS_ENABLE_SIG_falcon_512 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_falcon_1024)) { #ifdef OQS_ENABLE_SIG_falcon_1024 return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_128f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_128f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_128s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_128s_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_192f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_192f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_192s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_192s_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_256f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_256f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_256s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_256s_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_128f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_128f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_128s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_128s_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_192f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_192f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_192s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_192s_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_256f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_256f_simple return 1; #else return 0; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_256s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_256s_simple return 1; #else return 0; #endif - ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ENABLED_CASE_END +///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ENABLED_CASE_END } else { return 0; } @@ -171,103 +234,161 @@ OQS_API OQS_SIG *OQS_SIG_new(const char *method_name) { #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_dilithium_3)) { #ifdef OQS_ENABLE_SIG_dilithium_3 return OQS_SIG_dilithium_3_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_dilithium_5)) { #ifdef OQS_ENABLE_SIG_dilithium_5 return OQS_SIG_dilithium_5_new(); #else return NULL; #endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_44_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_44_ipd + return OQS_SIG_ml_dsa_44_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_44)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_44 + return OQS_SIG_ml_dsa_44_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_65_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_65_ipd + return OQS_SIG_ml_dsa_65_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_65)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_65 + return OQS_SIG_ml_dsa_65_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_87_ipd)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_87_ipd + return OQS_SIG_ml_dsa_87_ipd_new(); +#else + return NULL; +#endif + + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_ml_dsa_87)) { +#ifdef OQS_ENABLE_SIG_ml_dsa_87 + return OQS_SIG_ml_dsa_87_new(); +#else + return NULL; +#endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_falcon_512)) { #ifdef OQS_ENABLE_SIG_falcon_512 return OQS_SIG_falcon_512_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_falcon_1024)) { #ifdef OQS_ENABLE_SIG_falcon_1024 return OQS_SIG_falcon_1024_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_128f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_128f_simple return OQS_SIG_sphincs_sha2_128f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_128s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_128s_simple return OQS_SIG_sphincs_sha2_128s_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_192f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_192f_simple return OQS_SIG_sphincs_sha2_192f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_192s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_192s_simple return OQS_SIG_sphincs_sha2_192s_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_256f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_256f_simple return OQS_SIG_sphincs_sha2_256f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_sha2_256s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_sha2_256s_simple return OQS_SIG_sphincs_sha2_256s_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_128f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_128f_simple return OQS_SIG_sphincs_shake_128f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_128s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_128s_simple return OQS_SIG_sphincs_shake_128s_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_192f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_192f_simple return OQS_SIG_sphincs_shake_192f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_192s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_192s_simple return OQS_SIG_sphincs_shake_192s_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_256f_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_256f_simple return OQS_SIG_sphincs_shake_256f_simple_new(); #else return NULL; #endif + } else if (0 == strcasecmp(method_name, OQS_SIG_alg_sphincs_shake_256s_simple)) { #ifdef OQS_ENABLE_SIG_sphincs_shake_256s_simple return OQS_SIG_sphincs_shake_256s_simple_new(); #else return NULL; #endif - ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_NEW_CASE_END +///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_NEW_CASE_END // EDIT-WHEN-ADDING-SIG } else { return NULL; diff --git a/src/sig/sig.h b/src/sig/sig.h index f7f48ec327..97a40cd88f 100644 --- a/src/sig/sig.h +++ b/src/sig/sig.h @@ -38,6 +38,18 @@ extern "C" { #define OQS_SIG_alg_dilithium_3 "Dilithium3" /** Algorithm identifier for Dilithium5 */ #define OQS_SIG_alg_dilithium_5 "Dilithium5" +/** Algorithm identifier for ML-DSA-44-ipd */ +#define OQS_SIG_alg_ml_dsa_44_ipd "ML-DSA-44-ipd" +/** Algorithm identifier for ML-DSA-44 SIG. */ +#define OQS_SIG_alg_ml_dsa_44 "ML-DSA-44" +/** Algorithm identifier for ML-DSA-65-ipd */ +#define OQS_SIG_alg_ml_dsa_65_ipd "ML-DSA-65-ipd" +/** Algorithm identifier for ML-DSA-65 SIG. */ +#define OQS_SIG_alg_ml_dsa_65 "ML-DSA-65" +/** Algorithm identifier for ML-DSA-87-ipd */ +#define OQS_SIG_alg_ml_dsa_87_ipd "ML-DSA-87-ipd" +/** Algorithm identifier for ML-DSA-87 SIG. */ +#define OQS_SIG_alg_ml_dsa_87 "ML-DSA-87" /** Algorithm identifier for Falcon-512 */ #define OQS_SIG_alg_falcon_512 "Falcon-512" /** Algorithm identifier for Falcon-1024 */ @@ -69,8 +81,9 @@ extern "C" { ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END // EDIT-WHEN-ADDING-SIG ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_START + /** Number of algorithm identifiers above. */ -#define OQS_SIG_algs_length 17 +#define OQS_SIG_algs_length 23 ///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_END /** @@ -240,6 +253,9 @@ OQS_API void OQS_SIG_free(OQS_SIG *sig); #ifdef OQS_ENABLE_SIG_DILITHIUM #include #endif /* OQS_ENABLE_SIG_DILITHIUM */ +#ifdef OQS_ENABLE_SIG_ML_DSA +#include +#endif /* OQS_ENABLE_SIG_ML_DSA */ #ifdef OQS_ENABLE_SIG_FALCON #include #endif /* OQS_ENABLE_SIG_FALCON */ diff --git a/src/sig/sphincs/sig_sphincs.h b/src/sig/sphincs/sig_sphincs.h index 34a7381cde..a717a636ed 100644 --- a/src/sig/sphincs/sig_sphincs.h +++ b/src/sig/sphincs/sig_sphincs.h @@ -5,7 +5,7 @@ #include -#ifdef OQS_ENABLE_SIG_sphincs_sha2_128f_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_128f_simple) #define OQS_SIG_sphincs_sha2_128f_simple_length_public_key 32 #define OQS_SIG_sphincs_sha2_128f_simple_length_secret_key 64 #define OQS_SIG_sphincs_sha2_128f_simple_length_signature 17088 @@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_128f_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_128f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_sha2_128s_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_128s_simple) #define OQS_SIG_sphincs_sha2_128s_simple_length_public_key 32 #define OQS_SIG_sphincs_sha2_128s_simple_length_secret_key 64 #define OQS_SIG_sphincs_sha2_128s_simple_length_signature 7856 @@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_128s_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_128s_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_sha2_192f_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_192f_simple) #define OQS_SIG_sphincs_sha2_192f_simple_length_public_key 48 #define OQS_SIG_sphincs_sha2_192f_simple_length_secret_key 96 #define OQS_SIG_sphincs_sha2_192f_simple_length_signature 35664 @@ -38,7 +38,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_192f_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_192f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_sha2_192s_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_192s_simple) #define OQS_SIG_sphincs_sha2_192s_simple_length_public_key 48 #define OQS_SIG_sphincs_sha2_192s_simple_length_secret_key 96 #define OQS_SIG_sphincs_sha2_192s_simple_length_signature 16224 @@ -49,7 +49,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_192s_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_192s_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_sha2_256f_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_256f_simple) #define OQS_SIG_sphincs_sha2_256f_simple_length_public_key 64 #define OQS_SIG_sphincs_sha2_256f_simple_length_secret_key 128 #define OQS_SIG_sphincs_sha2_256f_simple_length_signature 49856 @@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_256f_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_256f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_sha2_256s_simple +#if defined(OQS_ENABLE_SIG_sphincs_sha2_256s_simple) #define OQS_SIG_sphincs_sha2_256s_simple_length_public_key 64 #define OQS_SIG_sphincs_sha2_256s_simple_length_secret_key 128 #define OQS_SIG_sphincs_sha2_256s_simple_length_signature 29792 @@ -71,7 +71,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_256s_simple_sign(uint8_t *signature, siz OQS_API OQS_STATUS OQS_SIG_sphincs_sha2_256s_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_128f_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_128f_simple) #define OQS_SIG_sphincs_shake_128f_simple_length_public_key 32 #define OQS_SIG_sphincs_shake_128f_simple_length_secret_key 64 #define OQS_SIG_sphincs_shake_128f_simple_length_signature 17088 @@ -82,7 +82,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_shake_128f_simple_sign(uint8_t *signature, si OQS_API OQS_STATUS OQS_SIG_sphincs_shake_128f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_128s_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_128s_simple) #define OQS_SIG_sphincs_shake_128s_simple_length_public_key 32 #define OQS_SIG_sphincs_shake_128s_simple_length_secret_key 64 #define OQS_SIG_sphincs_shake_128s_simple_length_signature 7856 @@ -93,7 +93,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_shake_128s_simple_sign(uint8_t *signature, si OQS_API OQS_STATUS OQS_SIG_sphincs_shake_128s_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_192f_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_192f_simple) #define OQS_SIG_sphincs_shake_192f_simple_length_public_key 48 #define OQS_SIG_sphincs_shake_192f_simple_length_secret_key 96 #define OQS_SIG_sphincs_shake_192f_simple_length_signature 35664 @@ -104,7 +104,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_shake_192f_simple_sign(uint8_t *signature, si OQS_API OQS_STATUS OQS_SIG_sphincs_shake_192f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_192s_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_192s_simple) #define OQS_SIG_sphincs_shake_192s_simple_length_public_key 48 #define OQS_SIG_sphincs_shake_192s_simple_length_secret_key 96 #define OQS_SIG_sphincs_shake_192s_simple_length_signature 16224 @@ -115,7 +115,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_shake_192s_simple_sign(uint8_t *signature, si OQS_API OQS_STATUS OQS_SIG_sphincs_shake_192s_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_256f_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_256f_simple) #define OQS_SIG_sphincs_shake_256f_simple_length_public_key 64 #define OQS_SIG_sphincs_shake_256f_simple_length_secret_key 128 #define OQS_SIG_sphincs_shake_256f_simple_length_signature 49856 @@ -126,7 +126,7 @@ OQS_API OQS_STATUS OQS_SIG_sphincs_shake_256f_simple_sign(uint8_t *signature, si OQS_API OQS_STATUS OQS_SIG_sphincs_shake_256f_simple_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key); #endif -#ifdef OQS_ENABLE_SIG_sphincs_shake_256s_simple +#if defined(OQS_ENABLE_SIG_sphincs_shake_256s_simple) #define OQS_SIG_sphincs_shake_256s_simple_length_public_key 64 #define OQS_SIG_sphincs_shake_256s_simple_length_secret_key 128 #define OQS_SIG_sphincs_shake_256s_simple_length_signature 29792 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ff68438829..d9da81647d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -74,7 +74,7 @@ target_link_libraries(test_kem_mem PRIVATE ${TEST_DEPS}) add_executable(speed_kem speed_kem.c) target_link_libraries(speed_kem PRIVATE ${TEST_DEPS}) -set(KEM_TESTS example_kem kat_kem test_kem test_kem_mem speed_kem) +set(KEM_TESTS example_kem kat_kem test_kem test_kem_mem speed_kem vectors_kem) # SIG API tests add_executable(example_sig example_sig.c) @@ -92,11 +92,18 @@ target_link_libraries(test_sig_mem PRIVATE ${TEST_DEPS}) add_executable(speed_sig speed_sig.c) target_link_libraries(speed_sig PRIVATE ${TEST_DEPS}) -set(SIG_TESTS example_sig kat_sig test_sig test_sig_mem speed_sig) +set(SIG_TESTS example_sig kat_sig test_sig test_sig_mem speed_sig vectors_sig) add_executable(dump_alg_info dump_alg_info.c) target_link_libraries(dump_alg_info PRIVATE ${TEST_DEPS}) +# Intermediate values vector tests +add_executable(vectors_sig vectors_sig.c) +target_link_libraries(vectors_sig PRIVATE ${TEST_DEPS}) + +add_executable(vectors_kem vectors_kem.c) +target_link_libraries(vectors_kem PRIVATE ${TEST_DEPS}) + # Enable Valgrind-based timing side-channel analysis for test_kem and test_sig if(OQS_ENABLE_TEST_CONSTANT_TIME AND NOT OQS_DEBUG_BUILD) message(WARNING "OQS_ENABLE_TEST_CONSTANT_TIME is incompatible with CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}.") diff --git a/tests/KATs/kem/kats.json b/tests/KATs/kem/kats.json index b4ea3ffa2d..750b6752e6 100644 --- a/tests/KATs/kem/kats.json +++ b/tests/KATs/kem/kats.json @@ -99,6 +99,30 @@ "all": "b3a12005fe1ce49f5df510aea6a56bfa4bdc2d3d706afb0361d70dc88188a2a6", "single": "89e82a5bf2d4ddb2c6444e10409e6d9ca65dafbca67d1a0db2c9b54920a29172" }, + "ML-KEM-1024": { + "all": "2a655868dcbb4d3f901b2eea4b7be06c7c218796a39701ad76612e5307354d63", + "single": "03d6494b74c45d010e61b0328c1ab318c4df3b7f9dbd04d0e35b3468848584b7" + }, + "ML-KEM-1024-ipd": { + "all": "2a655868dcbb4d3f901b2eea4b7be06c7c218796a39701ad76612e5307354d63", + "single": "03d6494b74c45d010e61b0328c1ab318c4df3b7f9dbd04d0e35b3468848584b7" + }, + "ML-KEM-512": { + "all": "03041d19a70df3695891e45f78a7b040cace4f065750f9e743265a4cd28f3063", + "single": "76aae1fa3f8367522700b22da635a5bc4ced4298edb0eb9947aa3ba60d62676f" + }, + "ML-KEM-512-ipd": { + "all": "03041d19a70df3695891e45f78a7b040cace4f065750f9e743265a4cd28f3063", + "single": "76aae1fa3f8367522700b22da635a5bc4ced4298edb0eb9947aa3ba60d62676f" + }, + "ML-KEM-768": { + "all": "00f8b3a8059da63d0dc5c7767c3846033fceee14e6abd9f7aa96543b981a5cc1", + "single": "c7e76b4b30c786b5b70c152a446e7832c1cb42b3816ec048dbeaf7041211b310" + }, + "ML-KEM-768-ipd": { + "all": "00f8b3a8059da63d0dc5c7767c3846033fceee14e6abd9f7aa96543b981a5cc1", + "single": "c7e76b4b30c786b5b70c152a446e7832c1cb42b3816ec048dbeaf7041211b310" + }, "sntrup761": { "all": "36e1e53d4e6e295e8fb804449958ad9a3719aa350e91933c65791b9117382d57", "single": "afc42c3a5b10f4ef69654250097ebda9b9564570f4086744b24a6daf2bd1f89a" diff --git a/tests/KATs/sig/kats.json b/tests/KATs/sig/kats.json index 86d5ce46c9..73595b1039 100644 --- a/tests/KATs/sig/kats.json +++ b/tests/KATs/sig/kats.json @@ -19,6 +19,30 @@ "all": "f4f23c1153682007d5dec02c35e47061c17900fcf0adb3fd0437f1988fa13655", "single": "da27fe8a462de7307ddf1f9b00072a457d9c5b14e838c148fbe2662094b9a2ca" }, + "ML-DSA-44": { + "all": "183bc0c4398ade4fc17b6a7d876b82545a96331139a4f27269c95664b8c483f9", + "single": "e6f3ec4dc0b02dd3bcbbc6b105190e1890ca0bb3f802e2b571f0d70f3993a2e1" + }, + "ML-DSA-44-ipd": { + "all": "183bc0c4398ade4fc17b6a7d876b82545a96331139a4f27269c95664b8c483f9", + "single": "e6f3ec4dc0b02dd3bcbbc6b105190e1890ca0bb3f802e2b571f0d70f3993a2e1" + }, + "ML-DSA-65": { + "all": "3af4bdd2567fca1016583f917067dd5624bba2df2210934f62b2f6127cf88547", + "single": "7225c4531086d88c9b7fa18101b0f78dda2d38df88812c65ddc1ae94fe3c01a7" + }, + "ML-DSA-65-ipd": { + "all": "3af4bdd2567fca1016583f917067dd5624bba2df2210934f62b2f6127cf88547", + "single": "7225c4531086d88c9b7fa18101b0f78dda2d38df88812c65ddc1ae94fe3c01a7" + }, + "ML-DSA-87": { + "all": "cfd95d8ff8b92173685805ad8e3380095e4991bb3947b73f4c7e108ab47c5052", + "single": "f5cb5ed44a261a4118f9cfd5d55b4210939cb5b8531968a10c37060551a8927f" + }, + "ML-DSA-87-ipd": { + "all": "cfd95d8ff8b92173685805ad8e3380095e4991bb3947b73f4c7e108ab47c5052", + "single": "f5cb5ed44a261a4118f9cfd5d55b4210939cb5b8531968a10c37060551a8927f" + }, "SPHINCS+-SHA2-128f-simple": { "all": "4437eb44516630184c3cb5d3a4392e8bb955c2bf59ad17ab3c607fb7b7285780", "single": "cd1e13db3a56c0a6b3486a7b12bcddfda50cf5d1e4d14d3113e6456e969b8114" diff --git a/tests/PQC_Intermediate_Values/ML-DSA-44.txt b/tests/PQC_Intermediate_Values/ML-DSA-44.txt new file mode 100644 index 0000000000..b9e6612aa6 --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-DSA-44.txt @@ -0,0 +1,11 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +seed: 6CAE2E9C2CF64D2686C31C2118E0F24A47DD46DB85590910AAC9DF4C1B854E44 +sig_message: DA5FBC7F78116BC8537E8E522DFBB6F68710FC36AE5AF0ACE5CFA3BA4FEBF6C86D966A44C2CF53CCC4FF5B9CC4E6403CFF3C23B4F292AAC01E35A21AB11F0707726E88048DD05887448B870741FDCDFB4451E1216FA2F89D90D872B67B4BA546C8CBF504A46FC02036FB5B287BC82DB9E2D835802DDE3075C7B703ADF4FAE4F98840670964A1ABC61105C3B99C78609605E7F17CD262BE67F7E7A2C6ECC519ECDE8D5B76C21FE5C15859FC94382608A15C7E656AD8E0042CA649EB8EEB59B8E266C244591B265B672D4654C5FF28296707815C647DD11AB3148AEDE24E96D7ABD6C479C0C367B00E464804184140152063153EC3982987618F7D9BD9812DF3B95DDBC47D1F96C752C01B742255946461509EF7A7A67DA0123D670845BE07725D9C1E9F419B4B9133EBA36BC90DE45666C40EA664D93A16948F848CDB9688E116B00B0A4F03B26BB836DFBA93EA6247550FD983371E360F04C9F2793EB769A00BF4049E94F225EE035676E983FC9CB01D15EEB621CC0D4B840EE777ECE351EC66C4B28A36FAF02C42B24804E8126E3852735C2A6F6E201662DC18E125210C78197372B4787A8163ACF4B758921AB23ECC4AB8F3C1DB3549050E26F9C35594DCC1C27AE5999DB695A19681FFD9CD4416E0C79E3457C0B5144F3993E945F6006EA0F541B1C3F473D78F4FED4011E07D46F8DAF7998BBD4A9F5161050D9982DFF28C72E15D158A630B8972E8E2B70611C299BE4BD3405350D2FEB84C93E97FE4DFBDCA5BCC9B6708D7E542D7101BD8D5589BAAEC5649E1C9844C37C5DB0556BBB6B2EB577FB65909B1EBFA7F1590D53D98FC9975BC06F7AC581FE22066BACCA0375AE09DBADC8940E98A38B15FC190523FA6D0CF326D7495B5CC948AC994181EE0DFCAB11722E1B46AD6FD2065442D1FA06ADF21FD9187B55320F554B929BC795F367AFEF11A5CE4128AE32E6C1662D013A91456AD292DFB8B4C161EA7A4D5B43AF63BE7BFC5EEAB9D1E3198F441AD70DF9D6BDEAEAFBD0A293F1A6DF07339A4C34C7375CB6096EB06EC035777C22344912DCCF3D0F8DAED1D0B046118A22886299DA5782DC77487CA5FD0D3CFC724769D658131FA415672996CE8E8BED83E057E3AFE26703012DFED31C732D770FFC7FA55C59BC3FD68C34D00738FC9E3DBECCF4B695B99BBD6A0082752E3A95150AFFE793D8F7FF19BAEFF7744688445E7B561294371E1909035929D654606EED6BD011D2CB0512B1A0987827BB07F14E0E8F0207519DF24BA69AB788EFAB21F1BD69BCB14AB44AED8FEF80F1F63685C6A85100DA1671405F8D6454FCD7D5FB6656959AD68D3E1896C6EB4967615332993D4207DB6B0F5A6E33EF4B2494DCDBA368A052FB8CD48A9E215D30C8C9D37DDAA6B4F3080744E6861B9889E23E9D964F75C5BD5E1DEA98EEEA8E66C60DA73ED1E39115EE0274F2747858D5D644C09B5EFC978D4496A007DAC35F2F68B1F9A3B43A638F38F663E6124740234119D6E8DBAF933ED0FD5A02FEFFF7AE3235C82D90B2F8557DDDC9102FAAF8181E6EE3E9BCEC78E4C61F73FBDA23D4382E5050A0E151E3F51BC22347F9B1A19D74649558ACFF8B21AF69EFFF78BA3E83CEB9EA36D4799272E10F56D3C151209C99105A133B665EA51E4A245D37E7A0F84F5222E3A0401E67BFC1BAB6269CA4A87D3CF062CFFEEE0B5EE5FBFBE1029E93F46FAE68A91CFBCBD5CF8B78F122AF22A11E77095DEAEBC96F1613911E02237F795AE717301499936B440E07B32DDB09008276C9A5A314889E8B9ACDF32D6695AE2931CC29ED1DAFDB61B708B52F9888DA67ECED25217F1A5FC1EE7D482917C7C7E44CA7F0508FD3BC94BEC116A630ADB08DDD882A221DC5D2C60A2E9F2F849B3E8B5DF150EF753C35251850D0B4980164F8C07339E035D5B2EEC4BEEFB885E261E401BDBA408869951075DEEED3C7EE7452AB39856D240E954FC8A4BEB01A0F79187BB72DD2925E63B3DE9E16CDDC041BC3BE47F318DA6DC6A9AD8710B18883367E374DA58D97F00B1E75AC8D4658F04295223C8C4F61B2CF7104492DC7A0C6267F45B5A4A2D258239455A31D265BDBAC9FA6E4A1812043C9112ACD4240625F08B901263EC456D2A5877BC9223A6C483673BE5CAB8C32516AD5B956688A01374AA48789D53AE79E7FB109632969E45684804DAD54FAB1E94A19C2A9B53BB9079F484C64A924C9C940FE5A3ECF29C739E3826311E42B310CF75A47B932BEF104CB27FF35D51E3FED9D724D90A17982F2897E2FD09E555977CD668E67A34216637380B8720A593FFC4175BB27E293C652C2DD8C506F4D1F551542EDBB31EFB3C1BD5DE6315280BB04BFA53CDC2D38C385C04E9BDF389745ED394147063CE3639ABCAA429C68475F9D466321EEBDE9392A2A2A6EAF53FDE20803B76B0903DDC6C81874A81AC147FCFB9FDA970BA5D87E4C346DDB83B60A718882E16F256A0F13969606C43D84AFD193D7BA3CFAB875646401547458F36DD53C2182B36B84DE8F8A50E62921A5B011319C555766E46EFECEB239297DFBFABEF3AE370CF48A08BC073B1E16A8C840F2604F75F39B3F0F4847A445A15F0B946D423210BD32A09A9EDF4CCEAA6E90AD1379E5928B66BD3A02C5E2AEAB56E8987B342EBFD9D3C70EED3465D78F12AF4423319B3106AD8BDBD9AA5736E6CC15C7AEE0DC78A1FAB83015CF880DE6249734153F6058F567D648AECF70D1BAB8397879617FD6FB06DA1AF6713E3CE584B214742587A5D35C6DFD33AFDAAF00AFE00E1CB516C955DDA9E0A5FB78DEF914B532F556D7E72E9717BA49A2D0F9EAFB5F526565C58E0FFC1B55138C8DE7A9D6CC06FC9927F6D04440512163B58A236F57B093E80F3CE0F4CCE282ABDCCE284057C4DB6314E05EE0A32B5F20C481F8BA4ADF943CA1B0277119DA1BBC4EA1D24CF7E3DB4C759E98753EFCABE2FB5B1FA593F2AE2B7ECEA96B8E0E5B3FFEEE97DD6C50CFE8602613FAC2D71174984C2EB59E532A35CF4FB0BFD60B8CF7CA3E7D5C0EEAD822648533D3CDE72C787DAA412FC70674E95CE2F5BF58D3646B4881D4CB6DD96659BB1A937B24795B435A103C03CEB6AC85E8FFE9C495082A87854392BEA530D61A7F60B3431455DB85414E6C365F9721ED2DD4F7C8A37B88131F5B2FFB5FC4A40BDB12EACE61AA956F8724C99DA98DAC18F6D33DB5F4A189454C736 +sig_sk: 5AE5192442A0894AC775D84419BB7EDE9E8143A6E21CD709A47B58545EAEC7F59EAD08050900CBCF4093F9327CAD67B7600D9F8425084B6905BB6550F426F58B27B47C434BC194D466D83C14BF5ABEA249E05287EC80D1B56BD0824E4EB84CE1CD4CCD0DBD1BDE59106AE10516E1150E841B03E759DFDD930F6C7BAC857530451C438E128029E436491980441BA56913074A8A8831E084641A152C40B865C9A22100800C13828C1C080A84C850221688241100D220085302695B1881524812C832881CA970D91445A122221226449106800413504BA86DC126321C312288486280A64463C261C220220A210653C8910919925308200988640A3100CB9868434050C2A06C1830482138640CA80D03339254C44D42B29014B9500400220BB38D8C269010B62DCC481261980522A03050A489DA2225A120859BC80480129252906193985182B470584481D2806448C669C8B65164B6219C964143262D1294641887111414500A166D64364EC282095C980502202683C48113A20559B02D10090DC084519CA6215C8885E1088990200E11126D182129D422421C2811214611582829482830A1400EA4400A428040D0882504C72111B20C8A148123002C53B688943411E0448140044A03A4111B042C6330715204285114864B8004E3B649E4B831D13264D814094B468881424A01430C41388008478A20378A01A8080831228CB285CB880D0A13499C860191848D04A72192A80181248959964D0805202341511A36081012842407300A1820D39881E0164A0AA669A2462044240C54084D1B208C923869588428D1144E63920022C94DC9320800497111A28909B384234591C22452D8C220C810610106669B062959B24C11B38941149083886108C488D1462682B00D14B8685AB65080409119B38DE2A44D1A0161981489A196610181514106019B0804C3C461A1823010A32D21876098103190425252145293A241534692212332A1828D40286ED3288AA38689D1048E1C936910970DD4125184C891D4246A194609E33426D0A485481605618489124126C9B6281C2111A3860543C05158386121828C1835645AB68C04892C03422EA148065280910146852219205B3292E1440404A908C9C884E4928C5342400C181010064A1BC07142887010284D2241602393905A88455A186E618445400884139084D4406964140288006A63380E18362E98160C04A7894B964D04844051B08419450612912411B410E186895A00318C129097956ABE59E75ECDE4E7482F0BB70D6502309163C9467EA51E7B467C114BA884CBDF37291400166F7E2450D27C5A20FC1130192F1B482740712446605F49EA14E3C35515D05EFB062339E69349C7D416830F426282A4C0C0BF875B0F8D98BCA9B55BC1E199C9482A8D5CFEC68166F9545F54F3B0BF14B2117DDC6356966BE0DD3CD42FA47EEC9FE4058A0679F3EF50F7306EDF0DBC58D2020702F3C1F0B23A70C4B9023B43E7EC670EABB06869A3AA3D315E9606F0216BAFE585E0006BB6656E1C146E2396A44D3B545A1D722B6BEDD33B40433B7A9A0B91DB43A0AE280F57DAFF4EC8E534885C436A6745FDF857A093F9067BA95229CF52D0C8DF29C52736EFD4FA98FC4C71F6A94983F8F008F29178BB53CBDD56A045408151FBFDE28941F7D7446D5C2B2CAB3C418E04C851D230A49F288713F3E7221CEE86FEB67B469B076871A7CB576D423B5CE20FDDCDBC6F3BFE841C494E6D6050BE92AED653F7B387CBD662AFB7B01401609D66EE5739BC0E9188EF3F61B23F6402A8C60D6C272EB27E3C36ED15A7D95E1BB15C8DF6774170CCBF50A2628F1E80F3007BE33D0C5D86C61BD927BA3EADCF000C5B4A1BBC0889C42183A1FDFFDA0FDEC7A2CB5148953B3C428F1153E802B0E60F39EC4CF4108DE315B8870E80790E6B67799499A768A60F0E70EE861369F0B0110498B206B01AEC9C56CC335221E675F0DB77B6A2F9BE317407442F152D32E2FA57054C2F05757079D29BF85077E571973C5410743C679AEC39B51163A9B3AF4049647FA008AC5E304F82AF0A0F28E5CE3040C8D9AD6799BD4E70B7CA23F965FE96539EBE04ABAD5DF057FE26437FE44289ECF68807FD2C8900A0D3C5B3910BAAD7AF92BE09B83D0E16CC617607623DD8A2A8ABAE82C48C1B383C13DFFBC62CFC3F45B7A0150E799868D55AB5A9F67EA0D4D35D9D2EB0317768E1F67FFB8DCDC9A6FF9FAF58BC260DE56A0165900F4C54DF209CC148CB93469C59CE5D0576BD2CD40BB0D0A9EB21A41EADEE0039D603FC2917366B7F238E75E431ED7DE076E9E72701DB873D86F73A4A99A82538FDF5611FC42BA4D899BBEE81EC55E997A697A9591AFCC2425A570BD026CF0F3DA4999D0CE7C2238037D8077B3E13290DF420A41771548FF2C2D89416FE776481236B3AE22DD2A21EF8EDFD5E2CA526A796B568AC47B3CD4FFB0743771E647138AFD6ED35AB242B0252DD19862AC4DC3310D905FF68DB2F53038D176D22D350401857794DC2E2C1443696E65D7886759A5ED653F14D9F804FE55E45DB4090C0E166C01F1FA8AA939B8D83CB3B1B65ADF4B31656CB8D04F21452BF2B6B36BC18A8EFA7A0E4AB67479844DE5EF231C8E8168BCC252411E67EB3F6598176F4D6739DA69AB952CA12B7A951B2333A991477CA51E1183D2769CCF90FA87EA2C62DF0145990C787AE739D5802E9362E5EDF969DC71FE7D2542A4DD1EBF12A08240BAB3FB4EC2126DD0C0AC740EBA06CCA2AC6ECA57E4094ED4C818965105037D1EF2E739DC2D58F2554C40AF69ABBD483A896654C5A26668691A6A8C005ADCA4EE51BAA82DB5D7495E29FBDE6DADC67929A60FA95267A1AB764A1A41B3CA0C3FE03FB5A8400130E6ACE93197D3DF50FA789F16BFBE6B292FCC2E9C03C792473A70AFE13935DF76C2E5A2FAB154B5CC5723E5611BB363B621EEF74343CE19AEED62BF771DCAAE501977CD573A412F15FC8D294A512B7B412F462298713889F6F9E7DC35A0541A2A306CFB9AECFB74C0F94817AB81C804A0508718269E5691A24A2CFAEE8D0BAA69BB30FE04C45881485B2D67AD09D5EBB43A3508AA45BA2D54CCE4E7FD7B679E735A4D78E8DB1BB1583F1C91B0330D2AD7A80C7D2BC60DB220287413E9B4DF7F68E8A8541C1F1F85D15BDD51C6902DBA86A67B2E4CD86B92E7BF58724AC5A53219216B78BD1B50CFBE9B468A667FC6C4F35A610E7AECDA8A12DAB0C5A658005879E2608EA6549999DE0C396E2E0E19895E77A68356EA29BD2A1F24BE4552E679DFF66EFF39235183E26DF4D23CDD284BE2D9D706A301CCF33D0C85DAE62ECCBACC1503246B853603F68DB14C2E0B972BE2AAE6856BEEEF83DE2227AA876E5DC62636E2E95BCE79597E01A5A3FEAAFACC205AB09FFF7641BCE3ACEBDC6E6AC2C95276148543F2AA1A4E949DE5F35F4DC1048824389CB90910D47C7ECEF591BAF68D6A456B79278F356A6C34AEBC370EAFB722F0791033708C24A3CCFAEDCA515C6DC1B83B85251A2F665B3777E1805FC8F98F9789944139C60BB8AB0BFA34D3783599707BD44CC99D2E4ED71F31EF7AE6B8FBAAA4A5377DF40F8D6A4C1F720159E57718C78FE40 +rnd: 0000000000000000000000000000000000000000000000000000000000000000 +verif_signature: E98901A3F79293983D935DCF3A4DC9BA8966F70CB2991E6E1E5942643D37A1523FA43A15CC894A81285C4BD0E5063267D317BD1EA3E3A2F0AEA6BFAADF074926F5E522140F00CA40BE0C60D492C7BF420EEA18F2C922A0570FBEFBF07CC3D084793D4D07F9FAADF386C6D1C24AC98F153313FD1C5957D02C884B21CED24B3BC5148F9F837EE94E9FD03371342353D627FB43FEF9472C832A1DCFAD355DABD974AC185C2FF7A7CEED63124BA86696F095D419810E381FA62B66C12D2B75E8CF59B0CB76E9D0546D71D9BF154D0D056603666212ABE8910E60C08DF738EF0EE8BB794B35C6E47E7BFEC734A6451909972093BC17E080518053EDE94905978F397DACABE24A6C4194F0305C0F6839F8D9993A6F6948B9EF3984A90B1C3B2E9991449D1B1BAC5DD54A2134608851662F6F25E8ED9BCC65D199A770F20D2A28C35BC56500E4BBF0823615FCD07452B8A558F8146081861DCA6A6EE9E24C688DC7367BEA1E00181D8F0FCFECD8D46A607C08CCADC32499BB73A9C664CF07817C7D496CBB428AF3A06ABB3C4A140C681CF8A6B7216A64AC4BD3505E4AC5C08C076C9F34F7DF9157BA1141628837B0F3BE1064D3F4908FF0DFBD83426C90ACA12D8FB9C9F9955B5959EF6DAEC31140CE117D9448C813F2A940D54DEBF844E28CEC621CEE34A9F336C0294572CD03AE5EA6A9B7199C6FCD9E20BE47214FC0F7E0125A6D8B8C397CA9D497671BC41CA08CCC153AEE53ED7F52D678E2C1873D70ECA96AB6A43ACF1786BB018057F98A4311751CE6C3D8B0C4601A370AC792B081CAE7581AC836F0C0BD3F11D2A78EBC5E268F8D19355F3B4995B57736C042F1B7B90EA55899011DE84A2CD0F6BD440D51DB2A6BFD93E5ABE620BD0CF9452ED0C5F4EEE9B724B06FE1981A16CD14D554859ED4A1B205E4527B66085587769ADFEFC289D07B09CABECDB95CE52587B7BE95D6D5068C05AAC009CDDC7AB550CD49C66EF55A86EA04765C46E74CC3ECB02650797966A6F125DB780C3C507C2E2B4EEB148B243673FB5FD2865533CEDA9938821221A01A8585EF76086E429A80A99AE31A2A6D4541AA3393FAF0A092B3744D8C742239C18F7AA3FA258FEE01736D446785BB9C0086B6BEB05AE13BFE5E5C462A87C35CC5731218EF2AC902B4505A593457F553F8F4DA1533B2B144DB83963663C3EC991C6F834DD9C75B21E40D0060646E323468C1D44F6071B6135CA41EADD17F4C36FF21C5DEA01775FB519597D4F9DDDAB30CD63DB37E8F5C0620EDCBCF2C84595C8DE1E10EA2D8D7A8A2B7D45BF84C0784392E0C7FC74FC8787C118782E57802DE412A1E99D002C2793AA614DAA5685C5D4253FADF0A630D1150DAA8D8D54F4EE0F6081E59434C6664A933167C61262FE7A16E3CFC18BF9AC42C49C5E0F9928F2DEAE020F5E105BF247E4432F4263E6BE8ACE9D8D8269B215E31782DC83B0805538BD75503C2DB6E90E99741D120951598493176499DFABDE262A16F28EA1FD183485478BD4E906DDA0FA037A7061BB28C7031BA4DE21CE4B1F736B1DCB12ECBB755A1DBD11469A924F417383E0B0AE22739C8888477FBE24336C2551040B3F044B02F28D4D5C3CB4852871DDA6584303882AAD3DEC7D6436E0089F2E2E4F6E2B5FF628DC565CAAD8775C1DB24CE7FEF3FCD524821CD58E43443D1669F3E4DEB508C7A2EAF0AE8742C06C537520EFFC894EE1F58EA2D335E9B8986783CFD4E3687D966A03BE25A8DA8F8565FDCA33EF9F5E8444F0966F73952354A985B61C2A4520F7758721FE9E667EA36A746F999B6E1906747555A361CAB9603A68795B065CEAFE7625F3EC8CCD9589C9E4800AE2ABCA461734BEDF46582E2820F129E4030D6A2E967BE349BA20DDBC5DD04EF8006351EB39AAB145E0E5FF648491C016AB7CA26E5789BB184DA0824F699AF597ED6A252C7AAE5F60B0119F74C439AF414545B2C1299DC546EC0C66589AD70C2DB8EB517E3CE2485C0F13D96E699B0C7C3AE607619637FE5732A31DBDE4565A894478A00DFE78BF559E0E9BB65125B1CB5F1A1A221C9C8BCFA3D294646AA9E21F4EDA1825880FCB7E1A4B7F42522442697A677ED9F3777F8CB8BE61DA12BB182C3148E9FA395E95311C0C374698C462407FE98E188FAB09057099915732CFE16DD35909F912A42E2C5EF0059139DF8AE6D85FAB29B33973E40EA0C592B066859611BDE45C96F2397FC0FB5F07A2E8F2177FC2C8EA2E3F357B5617892B071F6366488D0FA27FDA31D674997BBE9B0234459EEE7E5FB606B405448DF0322782B55D5DD83459B55F6041E62D47E2EA4389A2A29F2C4FC27CCB0AB9907952C9678650C57C0B909EFA205CA27F25C13FCA3E572E484F0F7EC947CD0955C1020FFC6F1612BDA856C8E540E6A41EA9D0A84C7E9DC6255BAF32C3D135A299F00354463C3E1CC0B3EDCC309F8F046BD329A19F9B747828807B958FFBB7D6F05D77B3D354948CFA9215EA79DD62E0EA95BFED33F9BAB1B92623AE9DBD4512B4A4F6B8C08A70CDFFB6A91609309752707C04D183651191B1C3964D6AE450334E0BFF52F7E1B64273955056E944D47041CAEF51385F01F0ECA4973BF2A56FFD8ABD679BA6983161B7EAB18758A5BD62C61693BA3C724395F5FD69D94E242C6F174FB26E7BF0B089BDF92FA03C4C1E663C99F2E22A23ABC3810B0EEB6953B1EAF7F2207616400CF79E530AF203873BCC0DDDA656BCAE9FBED9B20F870877002DCB79FFCD2CA91266FC8F8EABEBDBF71F7032EDEF77C3FDBBA35161A45C22F1248106A4AD3B2FF1132A24891FBD3E79DFD2E0F44F4CD5EFD5D07BAA5ED8E13A406FC9E813460FAAA196E4DFAAAB4A8F686A59ED6753DE8A7DFF287BF24BFD60FD7A84A431F80D84062F4157A4B0712B89A3743C80FC0B5D8BA8D4A6A6D113C029F5579CAF30A9E458BEF1903A989753237CBC69109DC435121FDDD10892547F41EF4DA88F35CACD6F5199E9973682440ED4AAA0F98106CF86B30766B31C55DA18EA8C0FB67B0C937FF4727CD8FE157F6E96E73138F1BF394E7AEA8162CBFDD548F3F4633CBE99CF91A9364D4C93E6ED58167670DAED4358D517D9CD0C9B5F771EB2F9198F91D833CD92ACBE244C6888F9577BC8CB4B514E89C03DF1AF38CD355426A21EDA4EEDB8791B41BB13ACA2A5B6637B9985C1B19AC3D5004B177B8DB3844D3A7D0573CC2F4237A712D2D4F42D3429C13E189913EFD4DC4B382B8E14873C8FB7D3B885F208DAABDF186601DAFE85E02D2B47DA258B5D554C2ACFF9D12C2260AC96D5E150C98A226E212151A49788186899AB4BEC6F707203D48526F75848C9AA0A6ABB2D1E200181D2C5758606873777C85CDD2DBDEE8EDFF09162527435861626AA1A4A7A9BFD5F900000000000000000000000000000000000C1C2F3F +verif_pk: 5B003CBFAF3E5166A85F8A45B9C1A4533FF216FB226CFEB83A81A20EE6E97E540FE2E3C6E44262A8C344330126E881551371383EA34EA2ADEDAD1185908B34905B09FC1E1304BD96225F36056C1B2099C624E770227D1E7CC310EC1D24A8F034FD91CD01FFDE608FAFAE157C6589DBD5F63DC8F57E857844AAA44E0B644E5F6BD684239D145F3D45A8454BE4BB588AEF4245DB3A0BB949322987B9C40A7DBD37A4526363FDE5EC3778C1F72E85230187A9E7B35028C3EE5CF8AEBE8748C45D50E8E22A81E70494C11C276375A8AD230411DB26C8100F07C471D69095575F09A1EEBCEC4C9C0E050B84DF0D4F95E558B921DDFC8F26B2067680998E9C99488EFC128D5BF927ABA361FB5E9CE7CE32C0524EE88FBA0F3BE5A1A7C55AE6C518AD4C7A33C05E2956CFC8DF6EB2A81FD1EDDA40F67AECFD715E4E3DC042AA939ADD3D275AE72CA7EB85F0F0B38884D0D7DE81CFC8487411F8A84247B82B2BC28F76BBA2D80BB7009697A7FE729DC123D11E695E60C024FD31D5F94F5C3CA6B76A13B7537FE6402DDF86EA6D8D77718D03B32505444ABF4BE76C01FD43CC86CDD5736E469DAF3B9A239DE67FD4C4D99319B0BA690C424DA2F3E68831E5CDBA77B49F20D138A3CAED4EAEC8968F33169C3A2A7A28B39B875A6C18F5A3A7F49E6AAD46D475C99FA980BCA322FC69633B576B30E1E98F771412A1267F0C82653562F755BF39DB93090235870598E405BE0F8A58F033F19BBDF126639FB85D6030CCBF4666384943E7F4DF69EA767982D82147D8F52955EBB3D3A8D90664AE9B9610CE8A8F66454B2C518BE42853BA93C434C8D9E072726BBC8EACE2A218F6DA2639737541FEB2016E3478E3443442381A4AF18004C49198D2A87154DC6D8975BBE20C22DBDC1B95DEB7EB74E61E5AFF999833B746221FA13BC442F9F25C6035861B7A5E75615CA6749716E8CEF56CAFEAF04CCF6B824FBE295C55C2D796AAA7992C49C4903D362AB50AFABC4AEAF6644106ED2F749CAE8C970D4D49B79A98ABF6CC3933563E499C07B52B80F963DE9A5C54FDC1EC4898B7713630D0C6F4CBC84DC245EFCF821FE382E0FA855AD32E3B70D38772341C0C3000D2595F749E26C5EA692A0BD42251A5A9C5653B995EB6B83A2FB8635C0EC0E29F9760A4684591D0CEF71D46F529204BC56087EDAD2D37939040F834610B9BD3B616C87E43B49E8B38D21B9E8B936B4DCD90D73C12F3F6F66A111738B16A2EAEEF8DF66C61E78D29AAA2D1378DA0039ECD56CE7231A5ECE7EC2ACAC469A42B9E323AC420D86A100DCFB78B1716A0365DA724EE0EDC9C4E1115E3BE71F4FF0794A10AB766D49E11EC1DB556816627DD54BFCC22CBAFB4F2A2568192638B9789E3020A2802A4B90F9DDAA36F650546D41600B2F33A86B1B10E802EF9B7C4DD1975970A930B0CB383E3916728CE1E2C698DB19AA027CFB620D88840C51DF17657BEFD3240510D95A0B6C480FDC1A1346E00C195CFFA6BA7822625F9A47CA029C29858F07AC0A86F4A1D0792635F82C15D5DFB19CCC415A2B9556AD67B1806E8AF681827B1C465D8646B481A00B7AB68C7CAFC623656D2FDD9431EAE0171B96885140196EB7F1ED76BE72AB001CDE7CBDB220DA5EECFE8D341EACBE12878011CF85681AE8F4BF0A9E8C40087FF51692207C57E0BD3C1F45E90F22B5D139008CCCDF10795EF0858C5DC3F6FD9F78858DFCA5D81510A9682EB45D0E094DEED0454101B0B28EAD2BFB7CF1230994E03BC98CE986F4CE7720577DA9C8406BAEFCB3F2A8B9EAC2D1A5C598A076E52BCAF2951CF1D4339757F5C6C522C3BC4B93C71E47B639A6FB8FFADB117C191B4B9D0D9 +verif_message: DBAEDE95F7793725C9DB980AE6544EB2E2C4FC165C28A12B6EE675764F020C01C048BD0DC8064612E4B6858FB6871F71D104ECC4AA0FB27B9B79D1D95EF34E1072743826CA9E4AC0F1DC608D75695F1D39B5BC2B52758ABC11FE8BFCDAB36DA01B713B1434B9FA141ABA354EF1C50220757425B486682DF64FD3C584DFE147180657C15E6E21A9888219BDAEE8FD883A41177A6F6537F4DBE6809A0334D54582325C80119B6D4B37D45CDFCE93683FEFDD684F180119B88558D4737FCF1815063A06C0D0CC2F653DA98C272883B71BF463AC57A104F02C1944999E3788DB99F3F26D752F8D286049D0FBEFCA4BF5E1E5765FD0E3DDB9B72550A725DD96F2E017CC99937812D037FE476C613541DE88498A2CB72DB2120EA3232629709F551C4134372E58BD6EDF8366FC5F00DB38DF6E281962CB5C68FDA2CC4EBC135D438AE84E908E6DAFF39AA1A7E09785F8375D3E9950041679E86DEDC7398798EE624067A7D31E313A509E16BC25564DBD96F7FA811A6B5128819CB35396FD2BFDE8200EC146192AB727516FAD0FB85613B1C922203B4CC0617E076BDEFEA2A178DA9CFADC2044A89FF9034C23201F11D3C8B3EB98BADCB3E767812D8F71733885B6B6E13BBDE5811CA2DA120D8529FE5EBB21910E25ED49364F8E17EBD49901C0F235049258C97BD24186E5BC3ABE0D1FE6C448739760CC586BE39DBBF9043FAC6DCD5AAD1F5CCEA91994E75125F24DE6CC0495C2017EE37D35263294D1DCA2903A571D3511A1E38E575B0C1469E0B02ADA0EBE331CE290DB49F353C1C7ACFBD25C715D7B8154310B1042D73CC78145752A93B07BE7D1125F8B122A38849CE7AD7B69D0729822333DF209EDE90783CE95039E856002834F09BE1F41C213012B9569AB2F0AB29FFB084BE293B387B823F62E14F0F38DE03E4DE40F5A753C71A00DEC36750855A1771A06FCBD8B8448C67F08806812B72FD7C56EE3FCE1EBE2E2DABAA9196A2FCD9B4D479D553229D7C69B359ED53BD7132A2129130953F5EC0753703C202649F6D218776E6FB023A1188ACE6FDC49FD56BAD40D7936ED945FF0C5403F24377EABB1A3D97ADC8916EB8BF67B7C8DD0A48F8C3E62BF1A12A009FE4A3B3C6D7FDB87F64FA200285C6DE922BEE5C5CD28C0CEA9ECD6740C5966EE76C948195B626830725AE7D048955339A095DEEEF7C9DCDE9EE169B2FF233AD7213959231E74BF2132CC60566AB84806910894A0BD2ADFC562DBB4F64722280FFF3DCDAB54D5F96826DB5AF6BEF32068343A5F22F55FEA30A417C76B620BADA6A44B09228136516CBA30E70ABD4CDAA603A0FB5EC5E1268E47665D5AE9FF70468A3D19283A5276ECA45A847FDE13E3446F1F17CF057E581E071FBA06AC4321880B820C4E01329EED052A67ABA632B73896D0BE7C6DAFB5BC674CA11FAE0F1AF7A69CAE1C43A81733186902192F06ED2C73864D68B0584076353DFD8FA10F0556F8652C04520EF5712EBE2E4B9B4E62E308DB848D58106B1EB82FFA1DD6DF689B1C92C62C237200A38BAD10F5E622C9026329D48C0BEAE51BAE803DB0FCD68FAB0E0C1E00C8F0990D09D44DA36DE0C5B8BFF16D18E1AFCC465EDC575C9381334103B0098D209141F870E51D80813220A6408B4287C12228BC4403846E2A687A0DB1390BA6CBA6F0A16ADCE5E2EA6BF3FCEF7DE0DA0915638916AA437661F278A2652DC20FE96F84C8FB76612FA8B11D6E2FF0EACA6F1B1A4F680B4537471C24CD878EA34D725E6CB37A75F97DB8A4AB052518D798B0A8EC06A7094ECC524251CD06F5FAED9439299B0E988371E0AF0BBE7E97B1B601068BA3950E9F000D50CDADE4018A160A96ED4DB2FE500ADDEF749E1056FE8C6C9CA82E8D0FD9FA7F5EFAE2F196475BA73D4031E5BAECB0A83575D203D8CB9D6F49DB3B6FFFFA0864FEF2847BC36C3DAA3F19987B54784B84BCB8D5982FA6BB7145FA5BB9726813E24647D70C23401FB7FD4E1CA26D7E43E5FBA1E27FADBAC64D1B8004C201C7D29336DA4BAC7A18A850E42FFDA6E7C586B94021E3FC989864E2A40EF0233CFA7DAFCCCFA519FA6D8690D6DFA6948BD88D904BB9E6E9B9949A65F13D8F32F910C40A5410F8D71BCBF7B71716A73271D8355132776E6C56748CEE9ED6226FE340C704D1FBA5F58204AE56673C148B2C6C7D94020BE854D49F0E3FC5CCBA6981CFE3180521469E42FA3B458922F9BE4E23EC93324FD73BBCAB4C43C70E8FEA1D232A92FCCF4D7BEBA195D024C67A66E93F68618FAF32C943DB3622FBD22CF777084F54B638C774D6D3D8C91763C20693A92712456D42DF9D2FEAF0ADB1A6D9B4D500AB899A1AD2213A7008AA21E2FD7D00879FBBF765EBDC7526B8FA2D13BE83FD2717945670B5D73E96445A948AE028165CB3A73D3F2541238CF40C1B6EB26F33FC8F69D22BB899CEBCBB739BFD073EFC6993E221BA2EDEEBF35922FABE93B254C438A12A22E0BCF74D149043BDFECBCBB6EB1A55D928150F601EFC4DF082322B83C8395555135CA1936D690CF3ACFDC19E5164280905E3F3C5F62155184714EB9F61EEEFCCE338BC0217045AB2910F4E9DA330283CF93DB2D0B4EC2D81877266785BBB52AB0F81E0A06A7C5B736C1F58D234094F74DF7EE550C03404E9A192EE63163C079DF3C5ECA1214F20A2BC0683C66F22AF3F1AF532F5FD828EF3F2F9FF1724FC2BDEB2E7F706B2A219A2188D828D57255300CF6E29A7B992BED0D3BC532719B475A08D759CA7A888E0895683027D9A3AD3FDDA5F021E5193844382B1A963945AA49BCE0CF4231133C585807E0CA86D4779564801C05F2E474DF49A9C0AE7000D65B35B0896916CF18D584B24FC41A29AC67103A240025D316BBA272ADF68F06CFB19012F846DF8F5AA45EFC76B89FE08EC9CF0DFC7CDC63A392B83ABDD51DE93728996C99812052E83DA07DD3026A15E4186A19D2A7CA6232C9F54C886423B7ED35F9E8A97B804F401AAE8B3B7DDADF5D6851C4C4DD4FBC15D55520837123C11499A6F6B4C024F457C3D9AF50D7D82E31F433101834930C183DF8427922887CC36D9DFEC570C204E77E8C54119EA5D9210F82100E1C1E846F763397ED5DB39B7C086F3730617D91413CBEDB2AADC91C95B7997D45C8977D8D17E9BF86B97E3E388BBBFCD1264216C3BA773030FF49ACC1E79B2BFDF73C890E68A1EC42A638D2E5F57 +keygen_pk: C8BEADEDC6DBA5BF3BECA52C67CEAFB4F3EBF84190B2CFA6BCA132883129A28BF331E6D638B1FFFE8824C347E16B9D992FE95FDD825B68A5F54CAA876EE5A27E0FD5B5A895A0267F4B61CFD52E1E4EFCD108167C9AA92B781D6AAAE3C7159D934C1DFFE5A461D4C5E526E989D391DE00A09E1FADDEC8B774384AEA3C24E780EA7AE96368B0C2BF70A0E99CAD475760CC7C44B71F50E355CE6372D7EE0735702A57001A0751C10D6E743AA74B69EAF732E359281F3413D4CE40B59E106D3D8DD3564FA213FA00FD25F5E4EAB27BD774F163A8DD94A6861139D7FB07FCB1FEAD2C55452518CD8258187CFEDA4FE6DC28C54F404E14086E12773BDBFD0CDAC55C9A589029493926F07EE52C66426441B6CC3A56C6ACCC60958614A511304D57DB804AF7E5CF78C44AFDC6F7D830AFC2B1E98E113631AD4D12C40101CAA78F2669372B5C6EF96115715B18F215CF7A68EE7518AE902320F8DA67A354E82E876B5018A259A77CDF9211085F6407C8F46BB15C3CDE68505C2D0104D65EA71939337903E0303AA3CB424D1354AE3CB4A9D8DA0BA8378C2FB6320DE562DFF0ADEE6781815415E1BA48FAF1B8DBCC67BB0F8BE31640A392439AEABBA37BC0C9821D4361E4D04099D9670CDB74B2A2BF7E31F9E09E0D344D6B0CBCDFB5A601F789A27C75956C1F4E35D3C8E6B06A6741D64432A62DBCE5860120F06CD40361D8A2F7755E8A3DC1A408B054582A6BCFD52332DF9A5DCA40E8D2B021F180EC5996AA7CEBA2991286F4876FE9363B05C1A8998BC97D1C2D8A0F78ACC27E32A5C9FBA7D406DBC77FE3855E46DD49B6F8CECD6F69F4AF85EB1D40D1905F0C3119E6ECD0E9699EB00A5BD7D2D8AAEDAA10DD2284F959584B701C54BE425FE9CBF4A3BBBAD5CF726C64EACAF84DE406547F611626AC55DC18EC0B37AD8255A93907474987FD106F169D3461CFD77E355C32866C8CA18485F535F268F61611482838E25A86B3A60B892A0AA6CDD8A6354A16650E7BE46EEAC38B625ACF388A1A6AD039554B1A0734CD39CDA81075CD80EE63724E7B1322BA4820CF8B9BC5CDAD1C69258A20B888D2E6D7C6B0B3C4391DE103955FD4AC48B0EEB08BFB543CE01F88201B527EB8DC54C46172C2383445119C896A116F8650485D7CC5193A2F5060E0D368B0C1EAE2D5D743827433C8730F1A61B8C1C5093F2D06D588A5B2083517095AE4EB63394CF337281EF2C2E14A9DB625B3D538D5FAA4B6E9F0DBE13F147FC0260F5BA78716170761408D59B79794A191B96E66420258C02839213164EC8D333555B3C91FBAC4F33FB72976BB0A7BAE88E4FA5D1C2A29FC5F938F934AF5017CB24B32278401E177A6D4738D0BBA1179EDE6CA2E3C91C26C89DBBC9342FE543DCC0E21BEC793B3314F21DD4DB9AF73927C38A9D30252AE0F23F88FB8BE43B9F91427970B498A60B393165233486340FEA8DACC7EFC178A1EDE2B1C95C244A962F5CBF09C0AF6D3BA8F087C9483B75B675B99913D92F023BED773CD3D5EA57036C1D0DFFA33478B30B6AF1B9B0EE03D274A4F6B3A8351405DAAEB47412118804B47756626E60F59427F4122DD836CF6CB0940680DE45436EE698BB58E6813B580BF5D1869F2B3D0900CA958277AE0E33B605F64291DB4E4F4CB7E0954724A6222AD04CD37B542A3E667B52D5510AFC15950F44813A037A6FEC94AECC4C3F27912D0DE5456A64E506A11801B4CA224658BDC41A0A664B174F89F1417F99000A20A623F5B064D8FF58C61772F49BA4B00E9705FE11D6D364488F5C63045C914F8256F533F11286FAABD2F7E2E16F7DB234B5F807DC14E782EA141032C0D93B82590C4753DD72558DFCF +keygen_sk: C8BEADEDC6DBA5BF3BECA52C67CEAFB4F3EBF84190B2CFA6BCA132883129A28BB149C045A55EADA0C519069A8EE0602FBEDA8D2EDFEA09CAE01D542D47DCBA1E75A821E4FF2B52A3AB3DDD0C77C3A9F96FCC9BE360C2B75C97D7F9DEC97D1BDDE028D36C4FE18093AF6C5794AD19F9FA090C19A76F05A7F3B930B11792A13A7A5CC0014B440814300C2225826042718A40620C25418B381163006402848DC0C07123434C0C80052323101BA54CE08051CB18440009100894291047281290292495652229722017210C2426990466610221002088DA926C4C2664E43822A4148689244809B4494B844D124270E1328024923009228854A849C14842882609941686C0104910B84101222504972911B28CE2202C83148E0BA72058C209D9848CCB129124C444D2320264002DCBB408D4C60121464AD430101904420B444D14B6881A85655C08085B284112080C10B48863A26D82384DD9042221412DA4A29084C62D0A228E42C000123524DB448423C1081997401AC25090B465D19241403012D8922814328499188E8A92851A1590634292E0282A43208C2049881083841A144D22004209096C4C4871618661222104A1C02D42484564488420826D802064043900E2306C138948E0802980160E0830420A1044024888A3122493A28DC8420D04438501207292381014B16089060C603230CC222AA0C0650047420C8040103331C9948191C26148846C020580E4886D8036208CB069D9104C10066258242A4834444430400316125A90649B4691408060A00690CC04089A86256336888C4266028965D2346060B80002341220090D0C100219363152A24093488ED2B601E2C820508249CB440D88882D601809A4986962B461D4C060C4206953408C2004229482095AA04D813645DB106E23824D14454120368010130E1A018109C010A2080CD2820D0B0200090352DB3265D3268DC41642001961188230C1C870A4A46DD9A851E2180462108E62240D9AC805CA026552986D5196205820104AB26D149301D9264C0C4905524071422660D4A40CDB844CCCB6215C36710C208159060C9C9868498080C3264913894D1AC26112C77010C73113108D4A082903B65150B490603262419251C8B088C8388DD8362C4A126C62302598A8088A48899C94900401069238401B1564E2A02DC8949041826024A270932006111005C82248C9988C8004219C98891A914442A21112426A0338699424528A107114A424CC206913932D8C0820D68932E6CAB4257F007558859915D3119DBA40D5E23225C1D606DDB7338ECA33285F893C62979505D91BF0CE00A4AB85E6A56400DFE3C3AC6DF1B46743FF5CB7279FC0FED4FC7DA7546B5E8BB48EF3EBDBCC32B2ED9F3F8E42B67960A10EAF01CD7C84012701895C0B43E5E74065B231167C9B19D7608748F662357FDDA261748EB44AFBC207146174D812E41B69B177DF4EA9E9ADA8C541845751F924342FE5B1209108AD57F90D66AB9197B135B91A64965CDB2AFBBFBF74688CD93032A82852A20CB69C26B0D5E000799B1EB51EDC006A4C73A6C555C1DB95B7F844AD8E3F08B0489B4FF720D35D9B0EA3E782D43016D4908885D749DA0BCCAE012C0E282E807E4AF842C4443A0527D62D769C1D09CB755ACBBEDC9453B08AB58F6AD4999495AA48A32801B5282123AD8097F8E3349A897308C59B32EDD7DC509E55B79AF2D7EF85E2741AD010118B1C242AEECA7E405EC3CCA9F951E44E82254DBA3CE2D8DCA0B1F8485CE1FB73EEA1F23CFD71044CDEF4CC05CF8FD134056B32E6524DD950E8AFE9155F5E309E0D9304166A0EC11E7068EE9C2AB61659709AA9CEECF7C3474E3061B9D580F9883021075A02E5F10ACFB20EE611E045DA345BA6A3DBBAC0456B6A3BBD00AC196E920560125D832514A80A68F0B5D32A5C062FD785ADB47C087A54AF90A25AADE3573F13C20DBD8A7DBC23EDDBA8C82177C7C9E3AE1AFE144343E9EBFF5D0D2F4AFFA2181E75BCE2A20D8055764991CFC45153E8DD19368307EA502F1E9FE9ACAEFF83F541F9D900749259F5F7FB52AC73A901522F200C48AE427BDB9D46A8655F5752FE7C0B3865CF79BD62538AC6D90C02CEF067E3A3F1A37DA28FE7C12CB6AE09037CF6BE78285CC38039738191CFA8264D192ED68D758718B90542A0C420C993AAD236600A787A11176A5328940600DEAED4473FA3E4E246D9AA629A5A96C755027D2DD7B42A10C2C1D8DD9AEE4529AF5F52E2FC3FB6E94E6E6400AC053EBFA0ECDC7D7B51336339AAC992749471629C487043853FF373EF44BF57A122B2C81629C2F530EAAF5FF76B42DE24D4FB51E87EBCCC15847F13E9B6AC5ECB4C44F83AB765D59A8FB9DAA85D13A80D84A2D151DC78739B54D754A89E88EE87331CD2845FC952716A684999081B10CFD3ECABBA346E64EAA328C510EEA170C7FEFF023926DDDD59C52DCDF1D4422CC9D55950371380397FF86260A31806FB8C350B12289FA0D7898CBC09D753CF2FEE8358D364304E9A03CAD19D10CF091B9E782650AA3B0EB6DFBB0D833EFD9DD307254211685880D8D4999A52078F4B5D338494FF450AA761EBDBBA4575581E6DD793CDF79FAB0B50C69ACDA5CA67D212368914F1B68B9A5C3F63C30F7ECCF8E34D5EA40D70F49D6A177F878821B5DB7337164E49424F29138D7DDC67AB91E4AAE157F395474B9502AFBDA2BD72D2F9551B0D3D41D55DF9477F5DF5542800D1681409DA49E30D4C05B38A678EEB5D78C6F602ACFF96C5BCBA08243D6138DBF48EBD4854F0D97AED1E9EEFB381016D873570E3B3BA942F17C2EE97D2BA99D3470DC73904361FAB66FFB9274B3939A15B75E798B81FF5E30866DE7D48276D572B71FD4B241B43FDD98E481C4AC45BF37D3C72F5557040E6E7FAE7736EE8B79DB8CDB3F976D4134669A5C716747D50E60843DD18BB1C540B92524C933BFD8E82DEBEB47AA964CEEF0BB5402F8A6C5DEFA8DCE7D3D59ADC93CF2840316F12124D36F451AC068539E3EC98B9D97CC4AF0330E733297BA72BB6BEA4445F54CF2220B297C87C4036F428850DA5D5097A54F588D9D0A393BDB16234517B2D512A796D95DA11513FFB797FF3000A272B318866693C03CC7EFCB5ED9FC51F2C5462E70F15124F983C5AD9627ECB46CBEFC9B81996E4C5FB0A8C19B71CA1ECFB2694D90746758F0AFF9EC625F0E10120428A20F92FC8FFBE955605D497FE0DF67A8F324C8CE22B3D2B026F632F34B7C2EC3BAE0DD2B30957633F977723FF4F2FF22109692A60C0B03C628A7290E3D6F47CFA73D64F914416C816B9EAB8EBF280627F60628CAD1B607F3B9D05860E48A632FB34AD55601F296D45970399BA2FED0E7EF1DFB031D792A192F685BB448A86791E11EE86DE1A789A84128A553856C69BC7E8CA59A3F8E93E088A9778F2FD59559BEDDD2AE1C258EA8114B0C2C3AA61F821ADEDEC72A36E941AF1ACAB8F1E073727E55F8B15A9BDF578524D7379E369C44D1918FB5C931487C52A1015095BDD5F72928A49F6331561393AD6EE55BC54B501747EEB0B4A0BC7074EED9CEF04E140C86F543A05C7D8D4B9C58DC900C11BCBCECAED16A3816D304A934C492ABFEEEC +sig_signature: 3202542EF1E239D32BE1BCE5AE4AC8052D578899D653E368E11BC11C5480BA06FED24E83A4361E358121DA338108794DDBF93ED0FDE9AD07C50F983BAAF01E985F9A6F380C6B14148AC829B67467CCF9F2A16D2594DB895BBE6774F4DF57DF40239AB2C2FDA9FF98F9B1A2ED58F405631AB0B2D62FE573E82961B1495ABB2A572CCEFF02696899A6778F9127E1249AEBC8593EB8CDE2780C5E503DF4C6D80C449225E7A96FDD199973553EC15D964285943207447F1D6E8575A38522270AC356E200C1260CF4B5AFF21FB5EB8DD724E79029084DB5E26C1BE84D19C5C0DAF7B5D78384B57D3B915E37492604984016721CD3D532ADB65B3E7F2D93196A04E05303D3ECEF7C5FD110BBD61C18AA94C4C1606C4D542C82A55B0FD7E824BCF900DAE8926D6C8C1454BDA5C5C4B3788B96BFFEBC2A771F999FD1A2413FCB5142158CE3A8D06764931E1F465BC5D9F8FA18867650A2074472C361038B5B7BE8ECDBCB65268AC7AF4B53AC3182B1AC7BB651AA40AD4310AFC80631D1DEE6FF01186E4AE1E1B1797B8A6281F8F72EA57B77DE4C17DAAED4EDB6F93714AC56A04B61A411CBAF2C4DCA9A01767FC5644647250ECD8BD20C2168042A61477CE280FDC8BF91FCE5C7C525A7A56555BA9644E6BF90C8DE121C31CAE7178F56B0B2A332661FFFF4457BC6BBE68A092D70065CFA5F1022534B317CAFFE4EE7C19FE1CB374FBB0B8497B0AFEA62023680D9E54B1782079C7E090EE27FC7451C030DE8438C697C9436117A21A68B0615AF126C1DABF41EC1E39A4E488AD8EEEA11ABCE4685B7B50CC9B6666CD831400CA28B443675E8F93107EACEE1B287CF85C6565EF3330D07104A72E69164B2F64185688C91622775AD7E27F563CBF45B6BDFF3D9A2A8619DA4E45098197B0E72F559F3B60D201C81CE35DD3EC4CD7F2970F44D2BFBCF28D77A483E887DEA60E0BC7AFF1F1C0B0E703253DECE61B981FBBC9441935840025DFD196A1E3FBE956BA1756AFFF5958806F76CF5DC33C1D7E9C43DF25FE21B689D4245550D1F84BD226CAD79E48AD242B41CD10B387270628435A398E9CD217ABD388AF6154EA7B584F44A9D00870968880E313A2DF84EECFA6473CA3CD444416E6950F78A8AAB03AF462E07273AEAFC2B18ACDCDF7F0162963261C0EF0184700F4348E81A21404DE8154B5A9FF7B3EFE257556A480A066A13055E4CC01CA4C4F13FA74A33B15DE11511BFA41FB2BD70D6B09BD9D31ED6E3DF746411B35C587EF37487D7B234C4FFC4FF4D4875CE93E9D8C5D68C079A066B6D07D895EB61977BEC207377903D97877A04FA70D900C721AD009E0512CE023ECC6999FAC6BEBA12BBB7DBBC2D2C631E45018477E014E1206D17ECDA5DC7AFA24E9650A6AAC21EDD94816100E411C5DC32811C7628A5D0708309B1E2A6D8400D83E370423E76DB209DA20B8F20188205BE7825A9318E5F75CAAA0CEBA4F3CA801074426968F8BC1935830B7A3DFC07D3AC0F0CAFBAF805127604EDE45AB5E121A798341E0C59CBC681E49194D9B6D24CE563E38F0425D145A31B799AF07FF0F4D1E4E50928E8DE470900A9DF217AD64771F25165987D7524897B1C03C6A2220A93EE2F020BEBD37328C0FBC0A751F9601AEC1878B866E168BE948DA82CE3A80DE14E0E599558CE1634947335F68262B4F8FAA55711AA91192E9AC3DE2BCADD6FE6FC40910E9AA060B23C2FBFD838D760B235BE7902718491832CEA6910FE7A712068AEC8868524FB90E60926B5BF3ED8D4A10726D78DD64F8C83A907F8BE398C990300119A29B4A7756146E77A3D4C352FCFE015EB3C033624C2CDB43A9C1C6FA803ADE04F7790DC15615FE67D176B6826094ECD6F189124E307BA001D7C769F6A2A88B2006F4E300E85FC2AE85362F375A8DB00F23B12E87CD5D4EC5D635A8C8E974D4D2186FDC7CD73A2D7DE4F6D45BD6B9A6C6F6C24E0953FD059DCD455F1F9AD7FE01BCD351BB7A28D36C3A7277A544BCC3CBF2145D1FCE6AA1F278427026B79513010A07DF441F99E6D36AF18DF7255E46A3118B4429028D3C93689C28F95C407346F27AFFDFB4548BF8D90596F5EA3EFF931E37903E8438B16BEB8CF4CADA30EAC62BDE20BA3D905AE77755498510DB493BFBBDEE2EF1AF7C57B517D9E9DCE3DC861E151262561CA0038FB9CB9D44C421B9AFB4C8E260B08B257437998E8E60904777B86B4DE3BB33D5894B66E7F7DD1437F5B18C4D169D910B2AE5EB9F114680C8AA5C9D6A56FFF02DAC97430D2AF8B1AA166374AF5A4BBBCA6BC7A68DCA1BBF09F578E49B646CD23EDFE7C9002320ECEB200E35244DA38408D6C1AB67FBFC8F4D1E7FFA1183778FA3A596E507A42FC99390F4B78FC53313725E8E70111798DE45160B26217AFB38E81C315ED732C63FCAD5ED861D5AE901E3F8E34A011A2066A432F016A5D7FA626A6B09C3CED3E5530380C990F754A038243761145A591AE814C4FEA5F62B8DB21DCFD8E37C13E8FD212893F0DA9F656DEDE606F9665B1DDD250E4C0C3FFD55B91E4D62618DCDB859EF34EC7D99F618DDC754389A0CC397697BBAC7EAEAB94F93B7348CA02BF1C8A039162BF3428D532C464A785FD793875B89BE8C920A11B5BB54A290BE14A725065CDF92F102575BFCFC8325B6595DDE6E81FEE3DC65FACFA6C20D6F96F365D0FC6DB45E4AA9F852CD6F4A2E2A6DE867873E5F5B5B64449318A007EBA3B1976FCEDF4AC097D7F1033D6EF45EF2261C1D8CC696CD0EBABCC31E1646F9E0CABBF65806969913CB4EF213924F27142E8067BC51BD9267F460B3BB2F0B69A97CB47FBC25E4DD56781E2AF1AD93CF4F1D8B9136D8F144D27D6D6DF6B29CE568865E5591668E8D6FFBC4185525D5486DE774615249790526F3F4C29A256EDB6DCD41500C0F37140F2114A9773941B8BB50606666D38E76D6A6CC7300F6A5184C428A4866CDDDF175F98AEF74A8243587C436E9646E284FCF3965DD6CAA444A0AFBA1C04050F47AA539AF3A4E5338797093F4DBE2BF7919C7CFAD9517C15561E679428D1768DB77632738C44BECDB3686D2C793A5B66384CE3C57556B59454BA5BD9E9EAC1E4257735860EB7B6478804F23FC430D411A4A6EEC4AF1EBE026569C68475083058E8574D4B33BE929FDE5FAB363D0DB7C7B256996815AB796E27D0125D41B05D229F8E532DD93089EAE920B20313D286939732E0BEDCAF328FB6ED0D265E33ABC9B7BE7B87BDA7FB94C8CDB1F0726ED51BD7323682F473B3BF68564F1CB7FE7B67F38ED3D5DF369C9171B30040886D5105945CC2E3D117203A425F6263656C858ADCEAF707212E3543465355575F7080838494999FA1A4B1B8C0C8DB2527676C7686A5B6C8CED3E0ECFAFB1D5A6A79909EABB6F60000000000000000000000000000000000000E26353E diff --git a/tests/PQC_Intermediate_Values/ML-DSA-65.txt b/tests/PQC_Intermediate_Values/ML-DSA-65.txt new file mode 100644 index 0000000000..7ba88ad5be --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-DSA-65.txt @@ -0,0 +1,11 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +seed: 72C3C5E0CC9F332F49D0FC0FD6399DA75645A3E33DBF56F1E96897662D0A9B37 +sig_message: 4D3C4D952A1DAC151736AE9D0AD81CD37F7C492539FCC916A4B2251309E06CED54D7145868D145BC8D16F8B364E5D6026E113BEC4824F6BFAE3C9F6C8B888EC6B8254CE59E9E8E158EB04077841243EE3A89D25C0B30B4A64B723697330AA87663B20EA4513189558466D264C79088415EFA09DE9B64AB01B640B288D1D5562ED744E80CAF0B6E2C29FFA1C321129EBC70CAA73DCA7DCE899571260B2E7DA1B47CEC5BB00FD66AF8B89BB84392897BDD1A5EC7186670DB81604E0B949B5AB1D19522A3F0828D0E77B300A63EDCB74C48F9A01CA91816FBEE011A5B0145C6A0B4EFD5A3008A91E0B1AC96DD8CB9410D56A952E74B7FB684A0B166E5F0DF9AD100EE1C9E3B7B851B4CBF337A3328DB13977451E992C4A0B454BAC5E3A2D0E64D106DC47C216E0C0AC11B0F7C48589E573051133D01F29579C7E659F8FB8E6539B0F618D053F4323AD0237A58949F689458ACC109BEB7C81952A879A10BF8BC01CE057E936B62017E226DD66AB2470852C000A618B9B130050F6B047746D068FE7D3CC1277E4FF4400F372D50A85F469B4C7A35AA7FE88DD923ACEF5FFF47A95F971EFEA6B65D33E65F20173C5BA5A320DD16CAAB59AA7A7BB99956EB5ED54EB93E89C887A30D6D384345E21291B8E9C6CDCBE2671F810A058B858220146A2161B7412CE2DB817CC8C99528195F567A92E198330AAA418A5FAB754E27221A2165EC4D50B391BB8AB2BECCAA449CA1D4D3D5C17E28573048452EE4120693505515CF8D1E2F8984D363464DB786E9B8B7BDEDFA2301A21F3C469F013E27742405CFD0D15E70BE0560E8B18C82327861A8DF64200A51E69D2791D41E1E12BF371664270AFBBE1C84FF19D11D4C233ECDB8033BC79BC7967C410C13E8E3F9340F848510760DABCCAB6EF7BB87398B1421E5804F6D41ABC09E6542D72B87115F1281450C66AF3BF4FDC21CB9B8C27D0FC6DDFA73732A877CB7D7C09B8E9F4603D9865E4AF91C58E9EF6FD9A32C91EEAAEF170BF4F44BF586D789EF4571DEBE6FFF0C43E842617FA50FA6EAC8C7F70EB680BE717688188A430484F3388F6219A70EA38BF3C6DBBC7FF3A749D8C8E51A1365AE372988D248F9FC0F2B67E896CBC86D2AED35F7AD8D4E93C21DA09E0204C7DB92FAFA7119A63BD0CC00A029CE7DB93DF34C33ECF673D48CE3F40948DE86CA5467CC159960B329F560B20E5C8AC37AC4C8960B49B455075489E7217D6BC53AEE7C19B3FDE9FFF667EAE89AA6CE8C415E807B348A14598329A32D9E508C3409DF18EB426BDBFDDD88AEB3A9BCB9D84C2388AD79AABFA3445F80A9A68EF74096E34516CB5316DF599E5836F123271CF729472952E06C817A67C219382F22C4B0A16F7B7DDE9ECF4AF6927BBF4424F43FCEB1A48F1F2CBD98E7593464EBDE45D604B6AF95E32AE728D921B390F712ADC280257F4420741C0D29E0E6B98EC1708569381BC845420DB0C1BDC9A23DAEC209378A01703B9C16C10FC29763A52FEE64F0A728198513361CD981DAC739937FC472CCCF666E61DA3D90C3656F0BBDE6B942D8A0496C9BC703F7424BF00848800AAD519347A4747676C81088109A2B67848EE0A08BD1409E774434EFFE754BDECAFF2328EDC30FDB85B45488DE0A5D9052397F9DDB0F13865DCA85A36F6B43B84ED044E5F40BB960E56DD0CD6EC08C37B017FEEDBA4A7F20A174E1146C441DB99A2BF34BB8D8FCAEB874B2313AF2E20B01D38B56B42451047EAFF592236CB241857754F6E2DA3E4FAD070FD195B641567375B6F2744FE16996D7A73D095F5CFFDD50A96508FBECF802625E5F168DA08C8D5198BB2702F1CBDEA8221BCFC301195B717A0D174EEC9BB96E7C49E15CD2A26B933E0C6BB11D702F1A21D33BE48039E89E38C2A5E96F55A557359BBC8E191719EC5E5669B2F168912C9177B320A5762474CB2303F54E78F4DBB914EA354F8C9A3E81342A10ACF5FD4A3D8CD6F85A204E165D59E4FD12D5E5E900AC94DB984DD8A5A72372D8B5D8958EC7F9E64CEA4E560A8D9525CB24D3B2F36ED7D46F897229A776E7E422838D4E2B551B28D0127F2B7644F35FBC35BEF9D856EF447F699151F73AA70CEEA55F6193060762B28FA150A83344A2E23B9E5E1B103805DE85C520EACCFD67A5C390334B036028D4B696AE3D6F6DD80304F7CF915F564409AE65AB7504D8FED434966AA4D4DB3A3C90B160332FB8E6A33A392122E3C02F27C38E37B8B217958912D0AB987D232B65D7FDF3F28CD507D53F5CAACE75224AA3F63A4D4570627561AA146A32645C89BA18CD4C7D2AC0D9123C89AA7D4AEE9CF811DC877D7D0321AB5C7EAE86F2EBD61AAE681EB53EEA73D0D7532937AAF8F8A23E0AE64FCBAF782169422ADB6CE8F3F003CD2EE9EFC37E5EA1A8902D3D1246A89736C6C7C9A46FE2D2DB3BEC00E69166ABD213A6D30494244FEE28050BFAE566DCC91C9528883011F215995DC92E577421C9645478FD3C3D7D8FE06BD1824AA20B4A67CF9E8C2A69C031E4E1FCBCBB6170D60838C61AF3731C7FD4DE4D6277FD6652B63318CF093F7DF46D800368A71333BA657171ADCE4D360E249B32F49F93E38C22ECF79C89647564B7F83E26A59D403B239A25F7629CEC50EDE1EFB58D9017ED44CE34A3136518FA009C8BEC057BA054C794EA0F49F3C581FD91009835946742B806D115D4E2DD109E1797F7DB5575CF233980B2D072D247E704EF40A6DD6651369C67B576FA7DE4C794E469EB2DB8C0E424D07CA01753671116FDCB9B37C0517686D85111B8E12A530C25F0D01ECB20E259CCCDB0A081ACA5CD06E9FE50D79B9A7412E02F41B53CE3555E986833D253532CA5D27A4F0C72FB698CADE01B36CF94C5A05907AB3AC631455B4AE91803F2A3A83E9EE23EB0538840E1C9408F73205D7930AF7CAA7B4EEC2D47A71B99C07B09FB4BA1ECC036BAAF5230D600550CEBFCAF49A19D63B3B50016CC8077D97C27376E2AAA370BB3CA213968D242C11AC0A341 +sig_sk: D26EE59A89F67C98B20F890B03422C8027B776FC305BEF422CDED403AA705DA5DC380AD909E3C13A76F7F59BDD27A843F5F9A1B8E5B7B524EC445A916B7A44A15167716BC609B3EC0C653B210C6569BA2F8233E7F6E404B92EB21F39729FEBC06A644087C75FA7DA3DDC5B85A89F55C9F87A15014E27AB325EBEE0FE472B2F56205844860382534018828866441476575685641203648087665585183625516555420058871751602717711246115635711801273601127806468005021161768110481565564313804086156224775253365085726676702542128777015217656384221466167018541712560775653305356788352067346543727338426471425281415038533202656128851671300117755513452307351315370854301872372157587303156081110321112258736404438264705244308745330557358455160473004531650442427724303753273643545546151241081547412433078243311343703723518581388573234883002524138203118818106712726275005164485846703435423180316086652647174338637385102777331112452328027048826136833604027623520567678561007443300118026417762172470414100567684236755276750265038807023654313530500201746332586762137686700431224203777868426234607501375807606118643643343182854487118067375717216763686234631445727470521225615865012624304163885146650645168324156757125027631761181378335367286426405258178463767046744346405385558607416365312311607655783032614171118036426333182624236747864807656353823246577001576547601536137245648167881548355703441616635365661535127864121615460226475308628856173321321610874562553506784723735385386463056370235073236822680783606365575167372423481370273787321706274756640515832755317524417643527131782776733023626563226284344414664814180751873017473606630568447604505822478245472545717704487614714872060204431705604477512622755208754111644114533555057070674013756756613028602834613507528705643218236456378337317881601264762752721064334675027466164547381066466654324575825520882682277558861126808652842067242706502602822135413806761150412241623142132062854561462005282463527054222067084232463834703022563303120640700613028248186135862267807514517034808871887028651376603357053471356848232460123720236623604868157088117331708032612544633466027487881011822577840213584603838688167482874577534023608684717551178060878605807013607371146861810111452118252851727546664116442725718666627745565706128063346337837125850568444871521347471236266350161674306083251415448031242478881077307231468143328543380706263487802348512856658522881606460085147711847660265787214085344812640116713686701544018882508606025375436400485704788868344567583740386811256420867877166077712432136288331062381178450376522316153520153726880635206125157517874821078632831518110002578434628677277256811181641133083303108474041885821315177237034415740262877735530252155663683621050363812382062703541636072037888130651437727453164255777504346625374221717666355341021461642300018161635551232553767782843483040236111880701245700154018276321853454261144802277261072016656336147062500850107086111551067005227866654812551566680414042612307370316834001206560231706668641615317857425305313402768758031066346812502214187812016774121074448086641371280564240151566227543442570306101083513862873685631407576010245281712830883456301434255351012FADA607976F6837B0DBB68127C5D2CCA1E3BFE9CE0441DAA2FC7F0145F96BFA473054009EFB11184E4718B045758DAB9E41CC03A8A581F86F05C9D76F3BBEFCEE4A67DCD2CE920CD15B7241D7BF641B462A72E287C3DB90AFF454ED1C8B0794AC1055BC6B4BE81669FE7154ED8FAB43BB21E2D787FED316B81CE03DA5019A0948BF4F19CE09D317736CF91FDDD4B42B29B9AA0C87D58C81D2A74F22F44AFEFDF6DD0BB2689BC4E54C42DE56F5FC6796941631931525D7B51FED90894F88B20BC819BE6F19D080EFC03E23CB034D06EAA41D552CCB07B42AE79486825EBEAEE22CE193D5B9174E20491E23F376E3EA9731981FDEE6E0C837A07C9A6575A038522C8F36FF0E481625E1D693C69E2B4B4910B35CFCD7AFF76301F15E580C5F2E4118CB9754D90ADC4140E4AA8DA02FF7502514652F07D14B0AFDD6DE58EB896CF51E562E0B7972E85D334EE77E09E7882EF080275786B91E1BDB949E94BCFBE45333D0CFD6305746A24B4B2AF2311F964151D4F89D12D1D88A6149351CB3294926A3B05051FA1F613F4854A1727E3FC195F4F5F99D7A6F6766A250839C090B4D81E71C242A58DEBE5DDCA556B97DE9790DB89ED15CEDCF7EBC6D44DC61881BE060F905CBB77025694FB382A66A2809A759AE66DE102CF059E9339C26112F76C50B39EF61ADF393B92B69BD88F6AC5E7EED1FA1A8BA7201696DBECC8FB9AFC1946C744D92F3ED30F8834C398F42C53720510D16817A8074A2F12D27FF98CFCFFDFF7D94FF793993991FD44C799D8BAA5E0BCC789D3F3EF467104BDBF2092509E530F920DA1F663A5E58AECA50EC6F1DC82986D37EE6C195D64E2D4960BEFB09D3EEAADBF9873340A5B86D29DE380DFC41A99D7350D9A5055706E3C2037245DF35B9DF14B810772C3A40887CA02CF5CD42993E360852219A9A384647CE912D95DFF0E80706A2696296C7290214CEC72EEB016D34E03FBF5E65F4795E714B35DE183FEE6CC6C35E349289CC8DBB37AE9F968CF0BC3654D3B82175B0C5AFFD9B1D421B7DCE8015AFD77520A983E1C1E9EB0708118FA4BBEE072157E7CD8CE9381091D3F1D4FC3980E5F8C95D54F4E6BB7AF368AF6BC9B332B80D7F8E76C2E203049C3380C5E0F5299DD6E57CD59AED1CCDC998F2FAE72C29A0C1D4025C34E506AF77760228DC3B8B680AC8A2FAD784EA6B86EF34F1842E66F46FB0F68F0101425D16341825DD6605DF6310E7C9811F4CB81EC3C7AE7CC6F528C782C6D33D6883E895B3A06E3D3EA84B03E3603394B44A1A3C289871A59269F4B80F4C7CF404A4B99A529A574BE46AA6B52B4E544FB7022C90904BE1C767FB8DB744B9C69FBC503C234F6618794F71AD8C95E5087F54B4F82154454C0E7386F21EECC0DDBAF454382A973BE4ECEFE846E0C21B60B56448E6E5EDDBAE084132A1397F16314047144ACADD6CC342447DB6A65365747A63C404AE623BC3EF1703C18B14FDA178AB7392E9EB6D2C9045EEFC8A7368DBCBA7F09CC88D6EA54505BFB82373819FB990F0F64C6FBE421E35CEBD9B79CE77BDF3C8946C58C6071757284E5B53B56DFEDB2EEF928DC9AE02BA21A2B5E284E285C9A6CB05710E2634CB3627C52E577F7E5CA7475607A7D2CCF1452F9BA703D322F69A47F16BD9851C7528D89B59C03B426AEBB3B372328CA9102F624E901DC8A315524B941706ECFE0AA8AEB0AF8EC33D2E1CA203B489AE23C03D880B83D738CC277EB6FF1C7DD31886C639C6C658396F60920D8A0A4E58278D63E5B393295FE8887E8F9F6A64C10CA241F699B99D72D13722E42D4615D9F3D0545E320D38EF809076F4216428C29D130F2F83052B9D0BA4A8A1872A242BDC59722281DDECE5407CBBDDBDB0B8C04E9385AD9660D252DD28AA881166694207BB04F9A24FA92216ACAF264C9F1CC200A5E2CBD2694C91238A8972496097B3BD3785345EE7171A9EB96619722C64571DE6327CD0345B788871D7D5D0D84EF2A47DDE6680B7743AD61B164E558B672E4CC4B46D6E6C236C2133965C8B3C7D0F240AB0EDEC78E8EAB4C80040A037F0080FA3B15ACC3EEA34208065EB6E6E21DA65601454E6DB037C08B051A1942DAB90C92B0BDC8C572C52F0AEF84B85F0444A6C15E84801448DE8147804F796A1A05341E75DF015FEEA7E191C461D3D3D7B2D74A9C4C7E18EAFE0966C26731F0A960461DE0387A917A6BBD645C4668030642D70DE30D9954556CDACDE00F82219E7A3EECC8354FC5A4388195CE568016C7C63661559EF65E2FF09F1C98A6668C23AE4863B04287A3BB6210B3F5689A3E7978517B95B0D06A831F28A3FF55B6053E38535B2AA97FD66B42980CDAF9E5DB6F1212BC23CC740BB7DDF8CD40391C0190F62A0A2A287DCC2AB8FA65C71675619C451EC07F3D67EE81A6370EA2144D82CE63CAEB4FD76E6FE4979FBC149BC54A9236D4CF23CD5C09E83DE5BD888F2EB03008BB9FBA0CD5CB79DDB62E2F9F8F2E11B2210DDD11590E21214015085A3CD8DD1665AD81D28D552B214D91550FFB3010102098E6F7649A5E06057998E25B4F789B2FF356299C589B1CE03D4AADDC9177B47974FBE205516B3A9E88F0B3955F133C29C1EDB94969E89B2DA7B64EE423A51672521CB4E1008D0AFCF68D6172A174F0ECABB5FB7648444032E6443CF9C7FA4ED4E1728427C2804699A143EDF11CD6CF9F5AFF6341A69DE7B7710C223C799861242C0D68841F36D9200122381EBF94D22258B38375C0D25DC1E3EEE00E312433CF8032F46EA0A0BCA36C679F5FFC82898372D631FA7368595ED7CB6571E8C00C16AFA4F4791E63813141D800111C1EDCEFD8872BEA41B2DD159676F23BF85C7DE07A30D2459EB16B24BDAD322C177B174BA83EAEB352B2C0D8140B33D50B0C9F2DDD39F305F9AB4565D3ADF35550F023F346900C59903CB972579395840D6A89169DA96D7012A821632E66821591FD3731EB068CBAF5F18589E7541CB4AEB0C4859020D0A3B99887D12E78F690073D29A242682D4677E2F904DFD7266E0514F6E460AD2F1FC5CE875D106432C5F91D6246D80AE5C7D8C4E4EE122F5FC58380391ED536CA4F073E2B55F5ED19751479CB3A71E72E8F330E086981169140A8AD069A49B1FD4EB950F9FB9084D1E702B957D081709FC510D31CE2EB1E42AB474D73A793231EC274EE399257DEC4B9D0704265A77E745F4CD8F9E4909FB9299304CE6AC7FA95C3F47B210DA388EE748ABD41CDEB3CBB187A40BB3C3F4150ADA3472DDB398DF8D8D5865EF27F54CBD4164DB0465DF7443E1D7782A2403387EF0118E8EB10283A39072C36A46E724165F49ED170A8FCC00FADF037EBFC652810697CD7108A56989491BA2232BED54BA167DB9D526FAD348B7787AE71557C59B38D1054CBE9BCB45E17BB146ADFC04F6816334D3D815BF73FBECF1C9D437D3A47A7862C286884B638DD7FB64BE01B7981D7CC82A90938EBDD9664DEBAF0E03A595368B89338AB076BD22AD579D04F1C7EA075D220F8CCDC0B9E372F5194AFE834D28 +rnd: 0000000000000000000000000000000000000000000000000000000000000000 +verif_signature: F7789A45A3587330E7FCF70695F7F69688A2B8D0CE54F090214F109F56484F98C3AD1A53A5441C2CA72A3B3191BC046F46373045B9E540C73DFE91B61F0588D613593FCE1B00EEF1B227034C6FD3B18B3F221110FB345AA78631B8B59FBDFDCCDAE6A24D259D34AABAD218B3AE4E77186653B8563AA6120A0A531A4E913730DC914FE5E008BECE6869B02B07FDC16214540D316C43FA0C211B41AC7E52656729C773E4C4B88ED311886DD4D275417D70196644EED15FA315066003E309F832AF91262C949011FCB0AD2CCE65DD9EFF567EE29CC40A6FE0664E7D9F236568FC94295DBB34288233E8C511D28815EC721032296E1EDECA7F726A6EB0F76CC5828011C0E4013CC7EE4329B81ECC0D52ED1E491DD6D55C5265665ED8AD219B894F31C68C619AFCDB7358E5554C495B8B6E3325688FB8C1A25331D57BD348A27D390929BC46A1496AB35B46BA61B6B9D23CD06315FB72C24776016130ADB1CF2DC72959EA9CAD96AF5DA996126CDD85B134CC927A51FD23F84791A3FCDA077E15991748A0394F334EB8BC48A99AB9DFBB0F2AAD6FBE484961D3A4E8F8B21A6AC092B226D6E119FAD44D8E576FE96C6CDB6840EA614BAFC70786C519E1D5DC0F984443C8B1E54F8EE176D98B2C7027F57D7E3DE9B2A0A36911B8E47121DE0C07EBBA5D7B594EF244C68327EC6C6D1DD501F483FE9B9570597E70DF413E7AF03847F409ED61E2846E6C641E6A7FFA79DE6BFA373A0644B00BF41A034992A794DA17C88885239032C851764E3E4DBDE7F12A16C5A263E964C1E7FDD3CCE576DD6D56B18182848B7563645D4E42FF22742A996785169D7F503B48A7158B3CBD29935ED32049BEA1AD953EF707327B778BFDDDFC60511DA113A34F655712E4E59D6CCE404E94ABA61E8135388FC21C8E41344F324B01AC8C069F92575D34F88BCA22CB307E37070063320256B8BAD6EB7A81AFE9A254016E1C8A125089AAA3EDE84E5B6C2ECFAEFAA52B9F5709602C06AEA4A0384E9B09E5B88164B274EA3265FB5152397DFF5A3A0861E2BC12D2109289729747E83FDF243A1D17B98348379845A9E955E2D6F938DAA5918E2A14F97BA2BE501CCCAFD681910F4A4F06715CE84096F37A91DCCA2A8A4BE8DA7921DBF8D3F4EFB98C6B4F940ECEF832B549D068947C3DFB5809CB7B060A3A0EF3B21C0164501DDEA7C9E5E7897C6B1C46348B2C3E805F6F2287BA158CF925A7BA7F08254989C87D24979AD986AA97C51B01F45D4A1F24752991F04205EB551FD02D415F2DD1EFF142B0D70416C6D815EB91732B268FB20D0867442D71DEC057B286CD93811FF3F646EBD565D51D09A42D3ABAAC0F34CC817B18938ECCBB1FEF05BD3C2B494FA529ED4C634C9325A48173F20FFAC32DC101E6EE03B2FCBEC2468DBC8F76758C3215474F7EF24065F79060ACA3C8D5D74AF70F48301DDB30C05DB3EFA726CF88555901841282AA08F666A65351A6A24EED6BE211773107E185E1B488A2E491B6C141528462A86494B54FDCCECCB6AA21253686693AE798C9CE9E0BDDC6AE53D9B706DC4F4D81B9C73C461ECD7035C5172EFAE5602CAF88C64E79E5324030555DE211F89FD424C338C3883C83CA9405C2B5D1445F7C98C43ED3D2BECBE25F5F3F544CCC5B5AEAE47DDF3FB5649FF5D61EAA02EDEBC75CE478BA00426CAF474FA79E5B089EB1A882F15354592695952BA0A8EE91E649E3F2C382264DAA30F6A6D217F6129C1939B6DCACCDA5B637326E8A8361C3B56FCFFC485036865822B9BB87B43510BCDD55BC350DE7B2AE90A21E9E19978EDA10DF667614A44FE2A84D16BE043EA8773633EA6BADF65710052F341F65CBE928D3962A5A2FE64E46D6BFB8FD0D9978F0423CBD195F72F3CB19D7EFD9EBE33CD2F5709A57807DF944ECE568AACA4336422083B0697B6AA00586E4BF7DD673A3D596B8618AC3B4061750C6BE97CB53753D02395556075A26F140B93F577DAD505E1CF2B551A04C98C7F0901831B3CA61D75DA793AC72A44C7A07F7DBBAD60A55F49CBD79DEE4739FFD36778EBD08EBDB79EC07A16239C5B921599FEBFEA46DDF966AA4A01512E610943F5DC54B4C76B764B380BF2F84EDE32124912F54F7B6E207B7381F670F7AA0F3C3ED1015740384DD61A9765EE4696EACF82EA410691805CB688903535D7046100DCC2BA7D8302ACB0430D506CCC1C0DDEA7111A76F45B454E25CDDFB639B3D664C36D8843513A3FCAF9E6057E9BC068237FE2419A2D2D90B4A1FC2A71A146D2BD04364C79B8EBA8E3E88CE11E916E4A7528421328CF54FAAB2B19F44468781F8AB84B7DD972FF5615071430A4374DAFCAE1E6044AA98E985941BA6B9DB8C02F589603EEB8BE90A70EFC088D795E6DA1F1F2E6ECEDD031D8199E65912D434D09BFBE594406DC1150E99358CEA7FAD2E7C44C38B6E0CEEAB9BDE0DB97BCF5AC99410C9470E266B8BE45F6690831F4145E26379DB807C26DDF91E309D4F4A3E7ECAB7362F15D20EA433B7E70A7DDE7416CEA871498B2CE3F58D29D8628C531840F022DD3BD2F3809B1168D38E63C7F69308A31A2D4D5EEB974239B34A62BC85E4ECF90C336A0C37BD9E0EF4266B835AC8906A83CF0B35138A65E5D9A61FCC9B2D5A337B8ABEF88A7FB3C0945D7CAF35611AE0E44693A5BCE0A6E2FECAE9BDF4E356D6536B581A18F03A59164ED5447C7EC8BD997BE953DED932535B5F438A04319F5E0D8B0FEBC8DE8146658E52B9759C73935B120DC9B854F3C8F94EC9339057D7D7CD91F7E0B98D84EC7B2F92328D736018B03165A8745F8E77EB8029F9782670CBD86B4316C7BE4A880338BACFB015699BF30D3A4B05325435BA5FA3B9D2B2FE0B519C2CB246E53D1A343D661A66143C6F468C5538645CC26D4E2A8703EC9B10FC89BE6F859997708F31194F0DFEE92998B25E93B97070DE14409D5BA43DF88D15C2FBA97BDDE618CC3FC042F7748184BA9EC9CBA1B2006881D0514264198FB691C5C038E04950CF69099377FE66BA64E21952A44581719664F5D92397D22AA7032BF589AF8ACA48DF6D14EB43CEF0A9C8A8F9AD329525EF0AAA4F9E09C3513CF029F3DEFCBB4114FA0F668DB4722FCCD9C207B66F109ED95B454BB6195D59C4A678BA6F5A9B234121AD0516A1D4123D3826D92A61B35DEB295BAA2FE1B5EE25021DAEF857B5DF192E175E3A2A0D3F082F211CB5BDC236274F86C5DC74C39BE97CCF5F5794EB64EC645545210FC667D1E0740E66CBEDC20648CA1FA73414596BA08917A19A463AD3027C81836B8F4F02B99FC5083F06F34BD2309C2342AD88A84FA96E207C0108F6825414944F264ED6C4667C788D61A6BC2C456AF66C2F769E16901706912CC90D4B6C90DCA16CAC8FFED8397020E2975E24FF4C807C8AB731C81D36CA84C9121A8513E0C9D0F41BC68F88EACAA35599FAE3BBA6FCC6528D47E40C0764CF9C8383B3A44515E61D92CDAEC9CB9082B5A0C0379460D9179A7D9DF29E0B4B6A4118285215E87B6F118E9731E466FB3FEBD195E144FD2037D116627579AC55FED5E32585EC6638A0DFBE6ED6C5876CF8114C902AEFA363F4C9B72E7D5C852DCC1AF2B8852A9D0F995938865084CE5213B308A9CB37F681960D84EFE1DF5134A5915AE5878B10DA0FD4D9AC2AEF0C7E01C2E9E7C017E7BA740CEE1A899459BB75033EEAF3190D6779ED9EDD846A74E321528C03084A5D30874839718A53549B2EC6B2B730AA935CA6E1C4FD8BE0357D93F62174EEEDF8DAB7755B46657E59D7AA00B9F2F85E4C0F77FA11A5D69A23B1EF3A09F219D83B1F391F841318EEF35A326367BFA2B15FD714032092B9D02BF613AFF7696FADF1DE2C817077CB7C996776D69EC241A24254DA2D13987691EAC7EBA8CD8DCFB3947B1D99EDF962D215B318BB5F9AA04D1C82626A4173D02D410C586BCA4E51CA4F3E151B54F17A6BC9677609BBAF6C3038A67CADA66B4FDFB51029E07807D705969D96C9ABFB7162E45810A1DC4B56DA1477ED900A89CCAC298E17884269C39E8D7AB966F33DDADBE56A384CA20A7B1899EC18E2AE547000B904E34E46801D8574DB008417BCFDD1A74DC018E507B76B0FA08626235B1CE24BCFC320FAE3551C1C929B94C7C4965341829D8A1347D6A7385803B08BCDA84A27EA5E49CA1E6006EA232A53EE417EC881D3328A156382A6B293894DDF9B369CDE6B2FF59CB6A564E21C9279ECA0311F5D80CE39B98BF90DB327F74D3F762D117DF5F9132084FFB555A5D147221AF863ABF78715B72194529A0E334D4A191D42A99BEA52ADA2C7CC4A9774D5CB28D4ED82B61F94E89F60F0C8EA52DC079D4658BF8C856D6152D92251948B3BA014D8BAF3DCD36BC71F8E5B2CE6F535B7B9AE13DA4A1EAFFC253BE43A9F608EACE733CFCE52EA5CDA8359DB53FF3AF2CEFE8779BCC53C24A4B18D5E0D781BECF75B5477473A2024AD56C54A7F990EF6B1DFAC501088509D3A37F1C8D5C26487E420B7F4358E9269761FF1FA3AFCBECAEB68F5DDDE3AA8FD078CC4224CEA67132D7EBF5D232E43BADD218C0B4DBE1E16529866B9AB935885ACB415FBB1EEE69408A521B462EC59CD0D3C5496D985AEB0CE374F6772A4E6393A4EF007438090A8A9E52D2F55666D70F0000000000000000000000000000000000000000000000004080E121920 +verif_pk: 6C8414380856CB52D79C4B29139FB1839B8606F5948B9D72A956DCF10116DA9E2D79770186FC74D942C0F4A3B595FF6C19804B49901C6AD5FAF71601C2B600315E1F40C2054767B00925DF3AA490E8C76F05FBFB74911075E6518C5F1D91B8A0E5B59830D3DF3994760411EBB911ED4CC2C160E3849A93762DFCA7B9812BC7AEB2DDB2767BEF36505605AE069260BCC8DC4787C428CB3C076EF2A6B93561D8943F45CABE8F0553FF2EA1AC95C1CE21593A175459D7DF12C4070ADB0EEE55B4ABAE59BE69C3FF0DE5A9B027FC7D8E6E057B7152EE6AB480D105D30B0F5051B60C7901C525C4635FE668CC00E9D3097DB99D66323715CE4F0B79B426B4545E09F4DE39323DD14CCB0D17108CD46DEC6138CDFA2872C1C4C8AEAD5C8CE04157E553A37558C2346A06194CB50B4981BF4D090CE4E860126A8254A4D4C084C3E2020BC0753521049B0FD88997E027AC51E75CF1350C3F303A0ECE426487153DAF1FAAD6808B9D9907DA9F35185BD3BE8D9CEBE916CED1FA2928D885A9CBA88149703F5E4772E48523125DDD026E714C49F4FB4E544BBF617A40B00B68DF8F155F5880D411877E25B42B2448B36BEC2F1F8F9A770C545150A0278E9B724500AEAAEA471C11CFF04E30EAB2F473BC048E32CD31AEF21579B699225BF9E1B6700C57E509FCA1F236294A5974DAA15FBCAD62D4BDDC4532B2614144DBE28807368C281A770EA22B1E5A3FA5BA14926DC55A54F84A2A77C5A70841F07BC1DEEF7403B247AB42B84ADF141E030C98468424DAAEB99D2577F950C2373CCA1E2DC2761B8EDD6D08FF79E528880FFB51C36ED420AC5D50F2582AA664E54EA5F4189EA0176DAA6122F6235A70B15CEB4DDD65D3BE6EBF3DC43189EE0A2E3105638F23873695280F1B74274352D60A48E5D3DD02FB7A5ED83FE27A698251421C8E9C98806102396E537390ACFD8C1D0B4F99B702A9EA659878583D92758941B30ECE507C104B2CE487679ECF68B4D8B980698ACF6AA6A57E8ED6AF3FF18D2668950428B57D182F73BB49B9B038CCC82D561278A386D56645EC3FAFFB4125E0E7F36B48B14B452547A0B481AA6B334229249153E42EDF7E49DD6E7636BFC615A23A401EFD4034C81B4DCEF027D344DDCCE0A71618EB5910CEC62228819385033E8D0ABD493D983E4FC087D72B455E4DB63A2F82CEFF65C1E628EAE630596DEC27FB98B84DBFDCDFAB40E472244914AFF179326D542D401A3CBB86E5FF8351EFE53A73C51ABB63FF553E7D7957EF89135E0F5BB1BD0C24F9E45E3236413C60E1396A47567C9439510F00D4A43C149A5CCC04F3D47E67A8E294A461A5F693DB0CAE22CFAC61E853477D339A4E45F7B17C3C116D56F3A068FC5ADFEF38FF85332BD5153C4D8FB8F148F117659C2EA94DB42AA0B0BEBB475A110412F3CD3349FC1AD041B7D5304A8593144EFA3A361D1B0C7613B82C086EA7126E43C616CEE8F1444E9956E87F5CAB95C7C7FB1758EC7D97019E5BA93543EF3BAC1A174299CA48BF7859DBFBDFF243B114F6BF423CE98B4D4D091DA44F3274D573FDC904BD885E35C9152A65354888F11ED4F3D63F26A7BE2F5726EADAF48586592BBDF6CEE246769E0EDA2A80771FED347D67AFEEC68B89463FA0496DBC15C89E8D569983D1D674733F2BF9DF4A980EA8C5E3AF15560A0E28D672B580AB6552ED76AACB5F80260B9703769D33F4138ABC10BF5B0582DCC62DBE58C890F51B4100127734FB7DB7447A720AAE009D00BE8C610792C64F131F2D72115C7E058E48B9DE64F55B4D610C36D112716A31A3DFE26699E9C2ABA05658CEF1B2B0867CF8D5233DB74FA8DC3AD145F5D28574360A85E3B0B10AC0A6467A7B05984628ECA10463F348A3111E00578D3CE5480F5375A1EE23EE82087BAC41233A14AAA724734B1874A4ACE1133706258F5FEA3A0C1609E30C7FD210DA0C4FDE9162DF66FBAF792FA2AEAA512F0FF7837B9CC02EE9BD95539F001BBD60DD8B42D616B2CA95F3835F5E47D43B1434C4563FD81C15BEFA202CF3D9540873F684AFE19AB5C01FA92E95A8CD6F360730856E59C9C6AB770D6575962AF75878572A2A26413D01AB318C100DFC34DC1DEFA5927C4B459925D73E1EB91470E37A58455C22A961FD53F7D99026FF884BF4A2579F706335EFB6FB2250D52AE561898BA1606E51E96D37C9ED3EC6CFCB33BFBE9C3143FD3B6B334D5F61922B369AFBB31C3E6E9B5F3AEBF95CB708346FECF7159CAD94A93D8CD4B8C4894192DFE53EA436FBF3AF4E864E8C3991EA020A811F0AF50B4257436A3FF522BE7367391D0F950BA6452FBFD8FD8728F40BD2FCB894529985B432DFEF6230EB4DEE737A8D10A3BCDFB763E0869B225C1A8D0E1FBF2D161C2C65D6DFB958E982D11777ACBEAD8DFB6B1F5EB21EA942F7C40DC20D2E4EB3E729B4E29F7501DA34234561F6288812D612D41DFA83C5B8D90FF38BA548201B575B5293AD78120D91CEC059CAE2E76A9AB43EF1281E2BEF3E348D28F21947C88848960459489775176F8E40EE06427953687FB63E470F7D59FB60DF569F8A11E28E0937162C46AFC7D2210A885FFA21B3DBF5354B2941F4ED5D50790890840CC3B973D2C3D02602B29BACCB6CE17CEDB97B085A2AB310572BA7371D1F8120FFE37D0B0FCA35AFC5B562AA8499715A299CE059CCE3B0D11CEF0D9238961AD4BE11E9A6D1A4692177C8B0C53F11A8ED2650212E7A2F80EBFF6DCFE4672103658434D0327ADDCD66BCB6 +verif_message: DB8494BA19C4118FB15D0ACF4254FD37483FCF4748FD1844F717CE6F69589E61772CFEFA7F9758653409D4EE5A264B834E60D6BB96499EBEB2B06B0BA874BF31E641394CFAA6A2D30DDB8F045876208D2F51DE15E205E8C91B87ECEB05FF3183271B2649665DD3CC49BFDB998D539DA809305516BBBE9C906021191C5223E525A8FC3616A1765EC3F9C5DB53CC337E039F186ACFEA91148EE2A79CCA3689EDB62AAF28B5D752FDE265EE5280B519726C1CA9803295C674B7EFAFA4D61B306A79E3F6E7A887C2FB535B3B0FB3D9EBC87603EAFEF170C1F1D28E99BB +keygen_pk: 88C93DEB901A24A945FA94AE54DBAF46D81CACA943FBE320421C61B33204849F65E833B5EB94C60E74A60DD2C8EA1DE7FE99609355875FFC400ECD83F12CDDD1B015AC750DF7759D3D0FECE79542F12BE30A473921499A927CDEEDF9C62F0A755835B9FF464040EF409784BA7D5091BC92C7A0C623DEC017E95C3AD43C865B595D04091629A56114A3197B0A2117E9ECC8F3E48E1E9C772BA61E5AE3138189DF4A9E149291A6694ABB1005CFA8ED766DCD8C8CF72AE9C45503BB7165B84140B0CB5CBD33FFC43E6384CD905995B5A787FE484BFE17D2BF30250F78BE5CEA26E201FF1F20574BB677F4D208E7C5B96CABD6416EEC1E21E0816F352C848EEAA47CDD8E02B47158BCB44451D360ACBD9DCF523EB9694FD5E1557DA8F2BEED0A0AEE52E23AEF1BEB449D913276E88AF188151F6876E4A5F570A240A695B06E4D60132DA2B62F4CAB5C881F7277ACD1DA48687368CB8ED49FAE2A4B2E5EF3DE0260A731D659A3F42A19F466C9F0D43A102B129CDAE4A6BF084196D6FEA7EEB38336EF1F256502EEC80A492F0A4F8A182345CC78978C061C563D03E07AC6FC2A1CF5CB8D25C04887E5D11391E0B3CD1DB105C7345CB3C826FF058C30E0228C3A857B7CE2F064A508AE80E33CE2CA82FE36246D61578C52D69A8F8B844F997D8B4A3341C9162FF5320228DD59C0911FE479E72FF6AB43F4A7B9F805A86F984512F973D091370F681F49827296964341ACAED3E311A32BEC5174A77689A4921D029F21F6C40EA55F1E73C1AFF2143ACD3E5037FCD5B311CD825F3326710C3263A0CF6016D9EAE3E50FE2575E0AABEDFCB3495B9E77764E97A263D094806FA124A8C153E5975D35FA42597683C95D838067028EBDA97658E64478580CECAA772EC6CA319F11803A648D6AF0F9484B1582DE0C72AF6C4912EA7915FFEC900A95DE235DEFFC4F754BED724E3612F0696A1AE1FF05E8BE2590029CC0D34EDD8398250BC0D391DEE871A54F15685D9CD67F85EB1BB50F39A8A19415F863527430F4EFC297BC08A03DB9DF86D24C6D0F63D0399D2E86426331574D1C6798FBAF4633CFBE10B17890330965906E5F8B01DB125C0FEBAE1F3A498F45BF243267D9BAD096AE0F9C159A2B4D11BF6ADB35AA7CCC3A17E7CE14B5648281F856DC7D29BE92F560C105DFC2684D2D5D16257E22DBA99C93002BE24FC145440EF8F77B2ADAE7EFD6766200BED5AF94FD9F23ADEB7EF3F3E786E5D5FBC83F09120BD15935DAC68847F507E4561069915E02A65396ACABCBDBA9481227C0261C7FA80392EECE162FAF98E7CC5356FD0ACB3F5F9B7B60EA43664D053CA41865D068854AEEF2AD80A5626DF66AC0032B225197D52C7A55F6DD56D375A44DC234E355E6B7F100CD5DF2E307C20C8EF44C77976ABFC4542A0EBFBDB79AFF5E79B49A3F08EE63C0F78D124B621B7BF4BD3550AD63246FAF748C46E94091E4CC1A981B41464BD27CF6EDA5DED5E92C0A838E59E2EC6FD20C02590EA2807E1BEEBAA91B5A5F0312543F6FC63C0B82241532D2C497ECEE54C0E65E4F93836A8A690942673A6C61113966CA566C2CE9606BD897B900D7661B9A0CB5360C0A8D655F7A5D999136AD8ECDDAE06C11F2144DD64529B62CB1F1294DF222BFA6E5780FEC07FBA3F7ECA86B593C2611A745BC206FE307D4103A3B68D3CA41A77295411D44853B08F52BC74678E6C22C1700764564F3A391F787FF5F97A950C72F271F493CF64E808925231209B8BE77C862C77B7C04C3611C86397AF49DAF0BAA4EDE81714586C0A6C5210E46AB49AA56AD0DC86B0207E5A2F8DE9885B84076C310D09CE324E9CC275687B054A0B4C619760A5F3C4E73A61CDF37C276261DB1E07F5361044EEC1E69CCA9BD48C3ADCF680B9E47D15A2F39235437CD40E7C9541A06C0DAB13A30294338561FB326E8275ED2832F77D95C63915C069FD4F59AFF388458B9B39E4E0E24C9932477DD52FF83DBDD24F2B639FF9A32156673DF051C35B6F4F32F2178B2ECD09E7E9AFD4B62DD5516A0E2BE2C895387284BAF828F886C0CF467405B0A67FAF76E4D1AD93311C8AF07EEF16F1729F60646F3DBB92DB02044F9FE563338DAC30EC1658AFFEAACC7BC4212588C322519B3B90B6655C2EC509C73E99E4B2750BB711BBCD39FBDC5399CA93EFEEF0CE82E51D08AC350BC0B5E503835EBE1B6F84C42E1AB43C9D886D14AFAAB89A0BA4A7C949B9D115C7F837D7495A780103BE8BE9F514A349703FD0E418DBE88F227880CE5B2DD54C0BCE1FF6189C3CA89216CDD230D5987BCD46C3B6C2DE7EE54B7DC96123BAD7A9E2F926AF1952F424F39C890D61A9DA8CFEBE177085D1A8760876F68648A82DDB799951A101B828F57E0CFE0FAC6D9085962CF45113CC5BF6D7FC2ACBCEEB556DF2BC3D2FE67A863312134E73E506DA9DC78B7C94347217EABB73E2498D5F3A5297F1859BD13748CDB3B70E51971CC01F05B04EB080228D5959CC8FE5961A02092EC8D0A4D16806EB5D60864C3ECF801244294022A0823873DBE8DFB6D06B6BAF69CDD107C960AA4B2C3536BB442461374F8FCB04832648B60032864FA6F0AF76F34355B9343DCF3C8F99FEDD88CA3D83CAD67351B4562064EA1325180606362D79A76A1B3C676152BA80E82B843D85D537ECEFFDDCC95B4661CFA4A70B06F924D692D87E1FB8E1CEDD85F7AB9324FF5D0A753D862591BE239ABC963B50A7EF870B05F2721215EB90A60E466955D522CA4216944693546 +keygen_sk: 88C93DEB901A24A945FA94AE54DBAF46D81CACA943FBE320421C61B33204849F9B2E348E4865616680FC184376A8CCB8DBE598365A23A2723944F5351F42F4742077582102F6F313C22564F8414E3766704FFD18017FDCAE570DB8C8FCB567E413219CB4B45175B83E6BA5E322A112D18CF9F356B57697230350D2591EE02AD2654515841323222580321040120642608376188550176634731510683333264078287615325127243053385335764750651411352886412447441854656426582177856811385002136841863785472614485644207284863746275116021244362472076432854217184010421775184164585148086710348507315328027533632554457272072637135303018607528446155853611235027221854501562400845348268740118636423130112600008304711528547310530448635743456714114736044238013601575856425814675633201646054071686605736211151076637413357714200555148206328275448486820815273068581865813888742410733578478145575784510487413488666062634876800164637016836408303211676660763637808356747017075160431237863706556044582355114760786272844564615514502882771488685156518416550255351116131380722168040207817541321026105332372513588560486666763605153400686036781773532303584585585382556145873231702651821601615475613435453205253646503075124375016318314624232777663801878426424837807747444530335743840253724533076811415126522320801307361222364261376300384861833134623305050075166418506205175868745534514330687281118860016350371186663328188310830745273650720105182737223725385635305211024178613826583774831043548113617260264885057838040760302110701821740681056880326375664382264012811160478503288344266741722267083004322170545055376528862025654274506414127738251184766185614737488047085880182412045436442130811456752843202150138755727608856482622838424402844587233774173341673110070483014068730277187116367374840580222778655865752756186171868068267623570340728087440262064413321784407530432675404153510222801127828578286328620245514484038018554816208205504456242220260650730645146723062131036724555002752342572057672566440784215365574084145012466856044503712682574544182267478348533103222153480526415458043351332432388544762882581714034065842506052121857400516021227658746582612353405307057267338344888677344061742714328142333252663347632168324801205742564851867411268402233626284578365081135203388317585381768678341467536821887133776615067477507772088788475223160138804715728535786023260646826021885712306281647812062372811286363611046727808057720442776132781067716352488361062733251866054685857840457420542023545155765377857084582244508465135035172458261816554334254317166171313824372288330486348830232135368717232633204480245362724325702261536128252007641320831467776802114887488465410826053602543010762601427424208671532371636210147224850133520033281870045574587735040533704783165412680078731157477051152614324517581088431785417275162632401473321083815678856545746776374874304800626007433863048305242168847128485157785460127241373680123031320510753557476455618482257671271040403050182053635446611842371782003385206425686324506278743843608760167647052812756434850150304062123835645244000555334854840304846070522672406004825581021471435363611118441785683041814020154570744330533742113803627262112418474C221B25DFE65E913E32545DD2C3335E6C6CFF3F73D2A4D2EE3CCCF8150F4A4AA34FE4DEEE188ADF128FB7A2B549A96D67E4ABE280089CF16DE078483C9D88CCB292170D585A4382BF1C69FE4E9386CA1D5BBA673B92AB78AEC293545221DEFF4019081BDFB9374441C9328E45A306B3193BC8F207F914A5E4EE76C4FD5213CD1795294A0CF570155D124148657B5E209A64BCEB017F58C3F851CB7916BF22A0F94D248DB7F6E3758545AA63F89757BFB1C03F43BBA700EEBAA84EDDE961CC205154411CA10481A88FE101DB0BA375CFBCC6E777ACD72F00202BFBA22FE5C0964CA287D00CB93A01C235A3A6E731687039A86FF1C5C211868DD41B32614A48E109156079F7668DF08879AFC7D0E23197AAF7E630169E53D73797427F0E8C4727EDACE5E7B6930D5E1A8591B6007680B197642A71DC645D1A240DA6930FDDC2AE6A5AF09CDA16201E0B41484BFB1CD9277CA676D46A48D66637FCC32D2EF5DE2ED4974238347C81BF183F5BBDE7901D56F8A8F194E836432697BDDD22B0C82B37476893F52FD2A3A4E3D7D701FE3701B5945736D97F5CD029302D9C14F0FF981ADF7CC0B1A52E0490D5E8B3EB8D5E95C8F2599143849ADA2D1A9F2AE88C695635695C1F860D5E591911E79EE66D742C1441974FCBC29C42D9BBFAC0BC71ED751E56EFAF0C87D31520CC17AA910EC90CFF31D91527DCD940DC0280EFCC3C2BD2B9BC8F0B47E6BA575ED4C5A473B26C547E82ED737F4676CF6E2EE6F945BECE53DF764E6FFFAB31394D4E0D56219DE2B604A04D4DFF79C1115D7976D571D67003619C801F05288E2FCBDE519A5ED10C1742C780B2EAC28015CE8EC3CCC5D1AF22A8AC5420871EC6E80E8527B3FFC5D2AB8CE7201513476D0AC44B3515B37AB1E6D92379B100D7E36156DFB2586CCE1E2FDDA6FF51644A0B1B71B4B976578D05D4BB8F2BC0E9160A0F714FE270DF89947477D1CF24436A166588D465CEBA4E7AC85312B4819384C9D032AF59A68E08AD106A3AA00CCEA3F1C0B1F13250425160D5B446EA5D1798A2B9A2813445BC70EC445E13E371FAB63B6A711554C4B4BEC8BE672012E9C056A0898A8658383E093B2F5715D0C026F8F5D7103B9B8C8CDF4A52EE26E537175957ECE287733D17DF403A571E8BE539797E546BBBB28719DDB148ACB29EB044BF5F6D4D24F08524EB5409F8DBBA7B6472D68E9474660617CF5D6CDC1EDF9417E63CB38B8B00E4B0FAB413477F234A48B43D107FD9931B7F7B505ED5882A50F09A59B65865C06BEC97915FF3B04DCF95CBA410E930E9D5284B366C606FBC9BCDD07764D6AE6995B51B18407DDD910B774DC45A7F692DE66EB87F54BBD835F95E4912A4349F8B898AA5E0804AAB4607FFFE3CF45F40FA1E4EFFC0A42520C7859442CE7FC685F45A9CCBC15A5AB5B9927BDA21F85908D2DE4B9A8A82B7C0FEDF5B52F124D5770A080B07FADEB552464708DCD93D89D9BB3525A525B9F63DEF3AFEC97DD3F737F612CF5445D6BDB089B2FFD1276E1125A38EA1B1373E28AF987845EDB8CCAC3204C988DD64360181D34B5F2BD21104FBC86F19DB1D732ED4298C72E08C1D75D8800978E4CBAA3FA8E8B2991922882CC8841D33F8BA9FE1C24931A54121B9B3C24523797B585F78BB38FFAB9F291ED86E79B36363729632BB6264A95E24F19665029764994903E3330ED3967A6B1ADEB5038885B6188186CFC979C2077BDB2AE531C713CD58C258D5D767FB0587C0F2C37730EF326B9825A18578B9522C7108803553E54A9724300C69B5342FFD887D1D7AE744E721E161001D2FAD52AFA93A63F9E1DD06A1ACD7532D465ED15B07575C5B09917CB55B7B174A037E0A03B5148466531D5D315DB5156A58875C0CA2596554D6A97B3AE68DB6BC27F32E2EF54F8E181CAEF3B9F329643C8291225975009032F9DC9C409B9759BEE8759EAB07995DAE0D87A10B2B1157364666C9C974098C0A52B46C2ADE69ACE08D1E92726B33CA127CC26E775D1B4123FD4832B3954F2555010157AB5664410667E828A765CC56FE2B7D2AACB27B58B644DB35ADD1638C9E522BDF7481F1E3265C8D91081139B3B82A4E05DC449EA94ECDC639074B5E617C6B21AD33F79527D3D10BEB575ACD3816A9939BFE766B8F4F07A1AC9CA21927D6C609D71A8219C0D39A229C8EB231FBDD53E756D89250BC03811041A8BDD2A60C392DAE899F6F5C57935650649B6F21A21046978D4F2A5DD105FF16780D54DF3F7B0D1A6D68FD0915D13181292AD463354B90DA2C7709FE3F248C4A469EAE9CA57A8EABADFD0EB2BAAD5482B14553DBA05377CB62AE9AD76ED07ADB931E4384FDD1E55E4AE829B97509B3C84B673DFF72686365322859C521F366F38AFC2E8F4926F4D1D00431120127F1FDE25467DE4542D8EB43690337A7B405B88C3DB52A460D1B0BB8F87E1EB7DBC6D9BB50DC561D76F23A5C4A743412ADEF08393764FEAFFB3EE272DAC0420649790AFB070F5FB83B6283A6C64CBF69C49D9A0CDCD605E69FBEAA2A2672DC7DD1B454C8E30E0022EF2AAB91C7866AF43B2545B7D866100E56E67DF12A6318CA07DEC5726A14CCDC9728B169A1D039509C72C029AE04C28C1D6F0402E7819ACB651E03D0358794B0D32576331B9C0004243819B0D4A2006E81A36FE2A67F70473C90A12DD6BA669D5A1F4DA3D577FE9A8EBA7263D5837EB600976461E5DD64B93F484511EE2881949A5974FCE1575BF5D528CAB654CEDC7C5E6E4752024EC7117EDF5BC1ED2EE7F6B3C2873BD4EB23EFE00291DC81EF1B17CBB60723EAB8435AC2794E9A13D1454360FB18CB0B9B0733343FA008DC45046C826728C383FA6720A9DC2CE17AEFA0AB548AA0138A184751E7CC9F17FAF759EB18793F040DA465AB6706AFD8E02C7AF51807E2F50D90E872A4DD039863829C1DFFEB1684E6945127881BFF2E42107D643BEBB254A2CE489E32D7D52A99766251907770A2A1F4CEF9C1171340BDEDF1D539B4C304F8B22408AC24DD1E307CB747EF11F6BB35EED04B9C98E477B918A9F4C5EEA6502F3D0687EC5BBE77F2A5C7DD17BECF5E1F2DCFD18B1C49C8C9A1A7CCDE1443F3F71D77FE07732C9CBA7A43290185CAD7CD6741B9DA4FE0C984AC6EE6C175C7D6DE9EC65ED3C0A6EEFE713DBCC2D6478D657A1E417B8F9481463A799879531E99383FD3F7D4ED22D08043105FFA6761C0AC5B54CFEACD885664E5503EAE79A9697013B6272D69E48A60DCCFEB3AD28E402334E7721472930CBDC945A98ED5D78CF8FCC68FB267C0D497925A9C33829B3C6B87A0173DB9BC55A74A9F2C86859ACA54B158984B8FF71F6361691AB6EA01B5A922749058D0E188747E088387FD42B6879E879F0F66B276FCFD558011808212537AE4A35B308AF27687E6FB1ABE6C9017CAA6E47788925468926FEF07841D1BF4996FB9D3BB0E819773E84929E43C28E4B50696C08E1C7EF51E46C94C99401FE6B28125DA89A66D65D38B7A9DE8B02B59572A0F724667358C696B9EBE5DFC511B1A +sig_signature: 765AB68AC204B255699618463D65D693A1845D574181D1403972888A76B3A553F79FEFD05FF22F37A99C525FCE21F283E437FDC36F8EF4E9C3628BDAA7CEB6EB228C231180B68DE63421238141664AE3005860A4221AFAB18DA83F3A586491B192C852017781FE449A54E5AB166D972A182D3B4BD3D0AA5D0BE63CFFA78D3B1EC7014D1D65BB94153CC4AAF1CDAD92DD3F9460E91238E551A67DA3980CBC951F58250D32984BDF98837486AA9E1D7F0072BB731CA62C2316DBF852C8ACC3696CAC3037871959B7C4633E17479D53611F8AB80257A8D27A3C32EC62D6E68CDE16CF4ED867D33CEB4A963C91951333862788D42162BE8173166A1042E67FC81F37E4B33D68E3906FB51F09EF15B083F2F26BBEB14BDE765A9CC830445C1B8D4C3BBC438FCB76ACC8B43D43A94E85EC2D64CE0276A3BE11D5696AC29FE8624C30619A7F9E716F06D004FCB9BE4513AC1719A688FEBBED40B7781340E6AFCEAF523A337B90DC964F1580C78E26093CD952E1F94DAEFC15F0E5B289C0704964D9DE70BD8DFDC6F857D925C5249806A16D4233040B049D31E7201B52756F863836E6142A8CC6A2ABD11CF405F063683535BFB9B8A495404B98E6EF53E37065440FE868FB78E88E5E8998615F788E1CB6A3AA3707084F17C314C40376644E2DF461203103DE0E652038F90923F42C31C4A0169EF5D3A02FC575CE3F731D7B1A9DB444386708CA4A1AC239081A3F66E269A9070B7A3AEB7B76A0FD76C26B1FD91823F84D7C9F8905E4725B528896221CE7DB23530C62343F5EA3DB1F90EB1452D54EE1AB1E109EF962F0419CA0111F95C1A8034F969FBDC107F5EFAD0C2BD5BC174B29D65975FAD98A6127284227356A42866CAF9AFA816341C8E7C4C8E4862B7140B65FDA457E9B1EC13F78EEB051D371213134186969327D0E45163638E939E6A49C10B0E0ACC5BAC904E24F22A1F2B1298F4F59BCD085EB08302C55DECD53FA84E128647D3BA84D06ED092A0A416085BCB22F21A516BAC01F49C54488BF91E249C00A655EABF6525F0AB99BBF15DB355782ABB7626AD60B13EDB2E413715C129EDF21543059A3A5646F9A55E92570A683E6EF5C7E097C4DFA5B4A725C82C0F60E94149A5D1E6E1405DF204BE749A9A403D0A2EE26B7113B2439904190BEAACE77F25E3F62C122F7925FD8913A3331AD4313B6C34790E5940990761519EA6B17394F51FA7CE89CB77AD9A15A184261B4E5F2F95871E9CA59EB996B93518312EA1FA3B6DE7990363FE65259A7DE67256E3E8DC857E350826E95C1953689FB6A5BAD826A1E9C6ADED1AEEE12491A978DE2EC35FD1DC76B48322B8772A7B7E55E8BFC5A8BD0409411F0FF3BE693549F04B3463CD04669DF95CDE04D7EB6BD13DABB39AA3916A503FD1570D5C5D4DFE294A9E82BE575B27F9AB1EB21BAE06407E2698CC4DD533CBB78C92E227E9E81361049B92EC1999DC30BC9EC30E9BBB57255BEE750F884449EFDFD2891D69706C3CE2A19D4B64D26FCCDA924544D69BB87D50D17F640D5FC5FDEAFC6E4D61F2911AC6317D18AD12CCA6D6A6139EE3420E9E082527FEF6D111A885841227207BB79331AC0A20AE1317D09C7537DFFD182DBD4B25EE483166F6CE03AC5E19560F282169661F9F861DD5933597EAA06DD34B24924021D1D35CA30640147FA2AAF1BA782650DDECE73A7287FCB16E6444ED53131B9D9169BF6A9E09776D22C05EFC740A76F7F2FBBDE0BD4AC63F50B3FA5C718285567E8ECAD96DAF89F48FE1F6C4B2D34E01914669C1346F0AE30F549EDE7375C731203EE4AA29047BC31D98F7C2E472DA8224DFD0D05EDFAD589D9AB17811559C8C338641933C778FE572202BC4114EDAE552FD9A9A44EAF8BC66BC643A6CCA708CB9FFB8EFCD6CB5C87F16F821D88959B40AAEED4ED6A6D1F0EB7F50D90E1D40DC323C38E6667195FACD3590745D7A2899E558132201032C5782B343EC162C71066D8B752E5DFA9114B55AAF70A8C29179C8852A60308C7326EF0B4FDE20CABFFBC7D22AFC9400994669312EB78F0FD45843A6BA5A0DABD7B3AB585B7338A848C85E80B536EE9599BF7BADE5867A28B12B4EFE464639897E6B6FC79F42F785BB4A7DCFC0CA546D88CD8F98B930F072062994FBDE90510E6D61A7CE1BA63A66C4645CF995F2C7777E41D2876010FD7CF887E9DB602D96E864B5B9F64102E00F19868779A780D516B6945F5828E9807D415C768822A3B9A363CFF263E04BA72FF41D508B75924D17CCA35955ED102ECCF934FD2208656756B8DDA6FFD5A2B1889072E13141B9DBCF8B8A69E8DAC83403E7D4245A3118EC51BEE519669B84921E9EB9B85450A4504AA4763B5C1FF0783559BF7D04A3832D5C1E690C4A27B6F0E6DCBB4DA80249CA9825FAA25563267FA0237244B2DD0AE1A8F1010ABDF3D052707DF8DB9BD3B206B61D2B262B7BB0C0F4372155F6E27F1E036849DA92A792249E29D876E0C9763A244CF583950D6CDEC348DA48360F9B7CEEEC046BDFC7EE401D708590713E6F145C90610EBD1BEC8FB9518C39A2FA9EFEDBC61DEB4D0B61101E373930F2D5776BEE5E1AE6DFB2D8EAF3DD071753BB700E0C8CD997984905417AA2BF805C1B944FA67BC0CB961D848AD3B0725C269CAE77B61F01DB3BB160F8FA28B046136BEACAC81174BAA18B7E4DE4E210832232ADA0BCE8005B450C249E05B796E312B691FF1365E9A397605DFFA63A2F612CB46E9F2A1B5C9E0BCF1235D1C551FE568B1F2634F901592BD547B8C6068324EF660EBFFB6CEBB74AD033A614259146E30E02B456FFA038C2A0A28FFE5F882BFFCA4E7E36B07A6CB9BED8FDF775D4BCDE854DEBAA18B219F158ED2491028D2A418A190A8DB4F07900467F9B612B4EF9642AA1F7C09E08EE8B124401BAFB5BACAC225047483E5362A99F39CBB22D2DB63E3883812B8425360F79B1C3518C1D10C3E1D64DC1FC2C51FC20E346ECA8294E37C9116871485DB47395F8BC0D708EB2A52EE68BB2ACE0CB070AB9938C33FB341B81480418F8D39CF0E6F923EB65206F80D3AE5391392782F148EDB6EE3939431DF8FEE91DE625FD0AD8953D3A50D338179B4D1F7F42ABA9F5189C7DDE472B1F89BFA60586B750B2217580FA518549E502A30181AABA3217C1A5DC96D2A862DD8C9D91F5A556367C2D4449D07606069211EE65ABE35CD87025F89BF53F064FDDE4CB5B4C1FB6BAF6DF3149784B54382F100188E62EF5B56600412541F21019435657E53266F865D8623E2182945BF30D133B2F695D1B75DE11A3328FFEEFF041F4B5C3E9642040E13A825F117676AD5D40B827AAE31564F9143E8586F394BFFC0507329DC6AE496A6E0E0C1664E1CDF307C394D19515FC1D6B805F5ED3FE9FF20DAD5703C4304DEA97EFDED617C20DFB82BE300229227EFC33645A469E2EBF8B80B4D157BE5CBF9AB9BFD27350687BA5FF5BD6BCA72848E6E30C95EA3552A7FBD4662358B38C5EECFD5488A1FB3E98AFEA479B9DDAC0E91C16FCE75FE37CAAF089E6F84B8CE6331E6B57828A67A12B4D2616655E594C461834B228D38D323C509C2701D0327EBD8E9760A3CC8FD9892F10C84BFDF00A1F92DB6976F236BA7F430CEB0C13C28BF5D1B7C92E16637B699760A04C8871521719378292EDFF085900F364D1D2FD14BBA8FD437564B863009900F4C80CB8697DB7DD85941E8F9F459FA6C2DA26F2F783DA51986B344C5B8E8A8B993F40C7EA295046535B9AC02FC56769DC141E0ED3A6FC5C9ACE38611047042ACD43AE40CC19CF1103490B0107B3497471B2FBA1F078B7793C6EDC920DA777355251C7CC35A0124AB3C86371D7AB11C8BF083BFB879EE9CF02FE5570DD0503983E24C82EB211E47BBB0465B4BD47E2ECA814B68960CCDAECC5172C87F0488A0276499591FEE17639873F21146198D164C5C20DE69D110E87664406745F679E67626FDCF2202761A2C5C96EBE8303F05A9746973170A7CC1105F9AE50A53307901AC66CE27A65CF9A6AC0D2F40AA0D158AFEAD737E1611847995DEE84BC748A5ED57EA2AC58B2FD3D2D928666EA09D070A1E87953FAFB3AB8D3D468A153EF87CBCA1CE7A6C278AC890098D2BE52B3AF9F02187EB63883D5264171D485FFB1ECB272F18385C6B8E81788F37B634E0EC96728ABDCFC8769A5F2DEB8BB1FCA844DAFAF6D63AE11E8E9A1A1BD70E14D0583BB12B7E0C13C7A988054EEDC5DDDB57BC255F6EA90D6619B542FEDC5653078232070410AD7514FB50E9E74D51C6F8162AD25AD6159770762F36F74587E1AB3E9F50F7C04CE75CBC78BACB26FCE243E13CE74C28671157599651016BB041459DF4EA0B6F6F35E30EC1EA6FEA94BB53D347E26CE7BE6E4C569E60F9A428EB55C02F651A0B84E2D652A242A5512ACB2943F029BEE7793AC385BF7EA077F200654EDA0A23182A1FABB5001087AC7B45DFA79FAB72BAD1349093EC40870BC88DA3CD660E6E61711BFBCBCD507A2947DDC4114FE27E93A346182543F7C5706C5C8E58804F421AA775BC824D9B27C758A69042697F046A17C8B997A8FDA096A2A373195DA3E5FF02FA1AFE5166F8C243FF65497F673D6F9597BFD911234D7C81C3C5F1798FB4CAEA4F7D84E9F310343B51A9B44D93A2C0F90000000000000000000000000000000000000000060E13181E23 diff --git a/tests/PQC_Intermediate_Values/ML-DSA-87.txt b/tests/PQC_Intermediate_Values/ML-DSA-87.txt new file mode 100644 index 0000000000..03b881831a --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-DSA-87.txt @@ -0,0 +1,11 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +seed: 6DDC6B90E85615F0B14B4404DF3980684561530D0836B13E83E3D0FCB6BAE3A7 +sig_message: 802CFBC8E848587F98DBEC233DBB54909D07F8618BCF2FE7E858FF059742251554A5CD255AA87FF73D8327DD46633397926362B78C66B289E30E7EC19DB39261FC7F6AE86D8CAEAD185A27363A580F75B3D2E9ECA8332FAC06D08F7A7FFE386AF28FF78FB32352071DCB8AABEB29C56FE0DEA9D939BD8E10EEE39C37BBE4580C8C5369591ACA24C1F8AE9DDAAC5696CF05E1D89D71698816923A912498B8869CBDCA80B1F1B1B32FA4BEA289471C2573EAC36CBA4390F7FBA389266EBFEF3934F1BF6BA92536C65DE59D1CAEBEEC5BE972CFB0C7E4FF45B51B805D64B72A7FB9E693C76C2A7279AA0A27FDDF344F6636187418F0C7B857BB0953B67F919C79AE9088A47E2CD97CF2ACD2D665236CAFA82DC16966D98ABF04CB20CCCA0859A90C007957EBA715AA56C682F1EE2026687A024F1A713E688093571F830D60346776733F14B344B5544487C1C74140EEC00301CAED1A4478B235A03FA1919EEE7A6B65502ECAE4037C12B93B8C1CC6C5E4A5FC1BC0592D3116E16E4AA4FD19A2E1AD1901F5D891A279F158B0F3A61D1FBF9F8DBA0F1BB032BA0105B0E21BDF8F0A2A413B4EDAF7DBA61351E1E97A1396EFCE7B0A6B453DD189881ABA27A5A7746A77946E5A075092C9B283BB8800F506C73326B8FD8C58CD143B483225FA630969408FF62EA5DE9E2F1D2CE20E8003F43A99A197EC9AD2E2AD99F443B20D03888D1ABC89ABA4B11CE70B4FD83FD76D16E217E3CA687897270FFEE03CDA625809F31336D6EA0578AACB860CB674052E5B28922AC92B8C89B1CB17572A74D6265A14483280F3446520EAD954BD327DF6D275E59AD9CA389C6390260B0D9E1EDA0DF517122CB77A818F802ABD3B61A37B6B4E03CD732B1DD9274A5FF55C918BF8F582B95C4ECA7C2B441CB1FF501A5831A90F7B8C1E2C2440B4BF286CF806CAC55E1A464763FD0743B32305991770796C5C0D1CDE024CB1708C5FCC49442B5CE1D599E615781B47DBF1695259E03CC2DC0A424627F369748A3F3246D9A1824EDEFCC2919838C1C5845A0921956768B9DFEC8BB320F8BEB2E72B03969F1362DF33F53E920E12B62AAE258679526EC39BCC2608068FD6A4A7F9E406D439EEF46969BBDBEF6AD5AD85CD648F0BE4FFE62990C8589523D64816682BF3C7E0983321D2272C9D76A218C468149092FBAE8757E5D1E152A7CF9396B7714E013A8234B22026299945245D89803F13A94DCE5DFB832FC9CC646E42473177E7B61B585A7D11A195CCEC39ACDFA40016ED9C3E161A61BFFB3739E05A371068B8CADE321A2AC1ACDE72C816F1B6373C68AA457686BE571745BD30EAA42264AD560F69176D58769CF5420A25DF869D4A1F1B3EE800DBB86B5D2AAEFC9A24001105758D02F8885C94502259B9F3072A65394B73F40F1C483AF92E1A69143B1C305B5A555DD5668504645B470487BA105CD91B3687990D2A413A78BD905DB5EF7A09C679A6C8AAE2D7A4A8B2FC9EF49903C748ABC3F1B72A45A88401AF8349EC12C4D8407B3A76F935F2FB09C49CCAF2D7A66808B103D0D5AA05AB431E805113A944451C3FCCD785AF6539F859A27B1B41D49C5E057835CCD21BDDA0C1A4A748C39F8E37ADF41DB811CB2A027FC39DB2ED4E3A98C1A0D9252FBC103426EBCFFBE25752D5674C28AB7FE18E9586F1FA782422784AD31CAFF5EFED0B0D694BA7F98E492B9F9364E603994AF55AED1486931FA4631E38EA7C20798182FBBDBF08287D8EE51B2D4724DF6D625580340A1E40FBF35F344C48924178F615C48BD529EE0302043559952A01F6AEED6DEDE1479C06B638E73F0A3A5538598BC958E46E68D4BADC6A0EF2743B1DCB874082D03346D2EA9FA445D006621B23998BE5213D5E2DB4E3A51039EC1EDC16F5C7098AE20FEC3495C3B35FB9FB4F06B267F01FD79085036D5477D0CBD6C7CA513C4B68195F7D63A81F71083CFA2DE23270D588F8641D443C9A23642833EDDDD4600D5EB138CAC283CDE3964C2C49932ED6019EA56976DEA8E39AFFCA7CBB9524C54A1D010F0BD58F69AE6D77DEFA0B96A7BF532CA9236B514CF12DC859F14B4ECCE46222763743CFD8966B0A77C0585D79CA6178985DAB8B61355DDCF9971F204E1C01460DDBA43BBBA92B1F20E9992DA2734F632DB4CE9FCF279B1ADB3F47381EA6454ED9E04F6EB28D28F7F12B99D0517129FE641356A7F7B3AB15125038C792F94C495A7FE6E52BEE569CF7DDFFEB4A4CDDBFB56B19A195A307FA91238C7F9C781E1FAEE386E60BE80807D98618782DFACD3F5E42B9E6F8C564BBA44DFCF63DF93B4FBAC416C2DE588DCFC079AFD0F0BF37801304A37B280D084A04E2CBD8618B2E61F91304AB8E7AC10263AE11D6532D6D1A82F26C2C7205AAC631E9EC522C1B15EBE6438D854A3E23AF3CDC5BA37F31597F43CA2EE94C25E75C59EBDF2128FA21D279DD891FB7D9103126872F110DAEEDB92DE68DA7859681062C9098186A9F18324E9C35AF960C539224F63C1C6CB412B63ADFF779CB487D02E7CCEAC3B7A90148D7CDECC185F5E6D587E351ABB460DD2E3E4F4BC0A77AC488BBC0C634394F32E926FBAF33B6086DD64302388B492E55F6C08388963C81B5F045202A5BC783F5EDAF7D7EB6351633D4838AD6F797B6CF8C66E12F804AC4179D31BD5D24ADF2DDD1A1DB43C22CF98020853FB34E314527E1249A22B8CC5FC471D2DC13DED0CD1D9740EFD671065723A05C46B33AA8B5DE6BC4DBD4F31432780DD6323C209F977B1412100B95CFAE7EA901160A1EB67A9BA88970517CC1D95D1003322C44DC73873677B63D7F41E2AB9B43F64DC87D70C110510D80198FA4B067A696D99D3A2EAA0B0B37482508C4387CB5ACDAF36C06799A4C184474BD2B100ED665DB48C1789DC5129795DF1A161978889D7B2B0B0858600D94CA1565487BC8317A2FFF9AB4FD649E9ECB24EB4FB7829E04A3E0F2087C518C30596E390D62530ED44CE64A754F57016C1BB3C91FEBD66D4770C8C7406B4A33D58DEB3EA43E70D3D217AE3FB841CC981289575ADCE06027DA7A86CB73E42628D1C33583BD907B1723D025 +sig_sk: A3E61204447E6C0C9438D5D349EEC3919F9C406DE5823B6B0C10102F4BB447B4292101DF48738E3DB3E1B68947D346CFF9B9578F61DD718A51A9BE1ED6FFF1E5DE1B58833F1677A5DE1215BE80CBF1456094A9C7029B228EDC3DCED21B1888A31DF46356C3B953250C0C85A62252245CE7CC1646E75CD6B35EABC4EEDE18E04999404C03206AC3A871810224E2A6099142421A488614A149D3C0301B125051A00D833812C8264800148DC192888996118B321204922851B2448AC8001237902120081012090AA02DC90886200609D1A2251046881211444A46119CA48819412E0BA248E4A228C0406A60A4711C15710CC6112389115AC04C42348C08000559342A50020E2480044A1865D0326CE232260BA380DC182600306D623428A33281C9C22CE438801C819101846CD4C26C093922D28090E4844008862004466A4A061202B060C33884CB225100300D48B848093920CC146904008C11192D21358ADC9629D2A65153B06D19164421A0315C82310991440B48311C46521AB0488882604008490804065A447189C2650000900100645A360A08810508156CD4464C43B46064B8604AA62894422908062A59C24D498865044902D0264EE3A0652123801A890C63166109A00912388D0B0262A23000C3444E91B4210AB481DBC4050423208C96682034292184248434451C816154B06412226121004D58268924982551320C0B112684022D1101091BC46043446E22990D49A06014241221838C18B0709B844408A28059B44D63448C410049103889C3928DD3884150B22898406C9A8465E2448591C824948491C21211CC422C1B927100C5904B380E49068CE2480E21C18911448610008901B1644CA0899BB42964068614120A041026811485DB12884A344C21256618076204486D48368E24A020D0385161387112A5854A862D12812D22374614302D02B3404806000B3360E2926DE24266189625A2046D4146055034308C32294C068419C5819C8028C0A828A2366C049964C848450B4232A0987083386EE1408122817119958524002E6390619CA08498B6604A8445A408094CA6616228468B023021494920A664C1B8214B887082242A4A260D914249131724A4A8841CA50C80C66881980551484CDC00642113118AC06919158C52429114B6010226329A826810018E881261442884C406644A268198484991404CE0889022042D8C9844D020112422698C40011980852403224A104263028E0BC34C0C314241A82120C28599C82900058041442A4CA8000914880A378402052664944421C50922920C1B086D18270894B684192321D89868590269A0940C13B29013958D8904481C94710B28720A9069108281A4B64101240201C0050034611BA031C2942191160660988002354D593872D4A20193C00D1825051C946CD8326CE3482ED0C2211C230263C62460205004B609A2C088132468623642A13040491806C4848041360562A27053420814180A8CB48009325193182A80380A94A4700A2580D4160513944C4AC09100C26C1A184EDAA069203332034325C838610A05080C8764094629123709D8C20CA228288402840008021A892CE4202619098E4B826402A04D61C46549B845D10689141310598848D0A28512278660B42019974C43B231894030544690098591890621C4389241320019C444020900D302868CB8214912451C325024020AC1084D43988CC496081820626090309C000AE028506038819B22259902488244802008329B1065CA942D9CB2814A408D6148618B326113222C589810CB122999064E5CA08D1835660C0592A0242524152D0B090A02066113002200445004A10513C284523492D4C424E44424141732A34028DAC06101C03121364890A869809265939281E1300CDCB400C1424C03C424414624E006445A162D20010208473182160E9C1430C9044904122EE314010921818214055A4440CA06520042868AC4808B224952486010225008C4000B918454B49102B52101814DD1382012A780E2B86840A24448140813854558A881522291A12851D1404464C88821C5242122614408701CC328480202D9167119256999C28903188D20C0896140211308109C205063384923994C6396095B443253104A482601989884D01069D1066C0214611215218A088851124218192EA10481C9A00D23972CDC945184A46D02166D111432F0788FCE7E7F0EE474116D6706545275DA90CAD1D759D4A09832990245BBA7321755489F7D92AA7C662E1D4A38FF96C239099E511BC1D006E48E033CF6B2E09CD7F8220BAE963AE1E6AB53AA5ED38F954472A26FC644530DB78780F7CA5401243C53AEA1D660CDA71B493F8BCF93AE1CE14DED6B98CD5B05D8C737A53C79A2C632A4DCA846AD1BF916B289354D02464A14D4E9BCDADCF705D7604AA0C18E20439E485EF7E3B114107A6D80491A2541FB6D7BC81333A050C5D3B8E7A6F0DE7673F657F9AB0D03FA890B70957B5B3F9A27577916982A80F6DC8E1CB8E95F59763047A6388CCFA633D7845F7981E7A34353D5C91CE65ACBEA6E1C723918046CFFB9C4B3F4CD1B1F570B5C517D5398782052ED2AC30D98140A1509BE1EB315E45A7B04F0486F2E831C07D63E7ADE3434EF383808CABAF0044AD6783A1228DF5A8C2FD308FC95F59D719621E82F610D66B922A2BECD6A42A6D62666EF797DCF28CDD268177F4CAAD9D6C07BC7BC0002EFFF575977FD943967B6179A09153BDB5C62DF3ECB167F15EEBC521A66FABB3012730F40B2A5ACC1D5C0B649BDA549BB069C58DF5F17EDDB69731908A681D6CAD1961B8D592B810523DAEB6FD3D50E2A559DBB10FBD336ADDFAB2EC5F82F56D18E42970F424E69B87DB37FD085E2026FE20FBF8EEF4588526E9C6DE6EC11C2305BD1B7B166AC6A7E58A4843B5F62006E6F2C7DB6ABACDABDB87688256F2FD425AE70EF03843397F0D05319BEC5DE36143FF973CB88A2BD16C43E1E6D70851F7EDAA8BFF9AB9CAD55673C310BE0F47FE1389248BDA7D5795CD0B920512C4B7D0A9D1F2BF5234B83ED03DD8AA2F633F9A598261D75F71220738AD2A4CA6261848C0D692B55ACCE6FDA95D7AAAFFA79AB017A009B12DFA083CF57C5DCD73B0DC46E18C546EF424510AED9460648D0E330E1766030A175F189733C3A8DC4E72C61D3D804D159150BE96A42690186A3959805CB671EE3A9FD60BE3CD7FBEA3807B507DE39507362FCBDBFB86B205F1144C7B11414B71A918B74B55BFED381F239B31AD61CAED288842294F3922A7F3DFEFFE1443EA774830179FBFA4301F8E3BA74BF38F401D672DBCB22CB6A9E0F1984420AE9257AF1BA122EB230161D28BB4CCBDBF06B3C1C5C4F5B551DA818E8022F782CDCC64A55FE92CCDA962877AC69667AC304B85643C1098702265ABD659162A4FB5FBFADEFA39DE27F230FB40F4B7A08DBCDAF5E2AB579E703D62B3F4CFE4DF5D5C84AFE5C6C6C2D6287382683C6BB4AA2A02ACBDEA75DF548F336178EC5F0E4D45A1295FE29CF11516ADDB737E4D830102EA0FC16CF24929A00A144D702C61FDF8F12E282134AB2DCDF490271108A0B0B5A5145C384F7758D602B108C9F563DE0A3239F4AFEBF161051898C05030BB8D6BE832E398E5EE032AEA53057C85B3035939F88C9215D71E20B98C1DC4155DC74A4A41F33224DABCE76DD8C385BAD9C0CF4B16760A04BC6F23D9B4AAEB36A8AE39ED03CCF1465CA8D8725503F3C8CADCDAB4A872F25A583FE97889AC54822781173F9E5AE6FC9FFB3B23CA8A61E620F6D234B0DDA0366BF571DDF33A27289B87DFE30C2EACD665B5F58344E862C37FA65C1E960DDC210F2FEC6807E454D1C4546F28605A64C0748073DC0EE40F01A1BD55D69A7D8DC0A223ED4BEBF83878D64C63F15C95E4D212134073929E2E925A74066DC8EFC77B6D6D049B7F5516EE565C0A4315E8EFBC941A054F5E9C73CE00A5B7AAA4015F6B07447BD7A6E45BB66A49A760613AA751D2A01C76D34ECAAB559B0B1DA951A0597F029B4595F347261FECE8A4EC72C70941275D2983F43697DD841DA7F6B31EF22A04932F2857395176C0F331AEF9307296E2E0DC0AC598DAE7D16BF3D84FC1D5340AA75D2AB4CE0D5E24FD6A76BD0E40AFE3DFFC4503F859F2A7190F07CC643372F3B2748EBCC77AB5AD93F9E2C1A3C84C5821F5F4367B0FD3FD1E05808398D75C4BCD8D69B0F42D573609E1B1871365BEB68ED23645F75D878CAAC40722A934AEADF80AE6145A3B36844903CBD1B082D210B72F2638501C1537BBE084E051C0BE28EE589489A123309C89071DCC21042967D0574CCC55C946E2EB0F2CAD161C317C229712FD88BB7DF52FCF071963427B4F2520205115C89D5CAEB88AA0C7218403FCB720D72B7EE900540E89F0E6C6FEF1A0380EE643B4EE84CD7DA319034D452E3468337A54EA647AF1AC8721880C2FFB2D4CEB94DFBCAD4084DD6985451E48E7F8C55620EA005683004567A643C5E32163C68E9BB90A093366E27CB831C0D668D5C8EBF3BFC17A472E183237BBCF61B077476DE3A40200DCA000E1FC6CCC19ED348BC1CF2C2C6CF99052A09A8DE314C9938E57603B96EE9A52F2B37D48F89C16F8F28FAF9CC5F5655BC7BAEA98088853957156AFF3D10C81456415FB8FA584BCFA86519435D7AB2119C2EC968683857B604264D798627EC56B1F68DA21F9CC4FAB4E35970626185A60A4ED690F77ECA24734B652C51B0683AE397988D648C652E4A8323D1912613BE403E1A7F775075F9BC8804BFF23E5077C372663EABF0EA1B8884CBDF6FEF70436796EB7A97575250CC4DB9580A7266CC0C99E3061ACB9A37886DFA5D0E68C447B2D81F7E72337367BB8AF51C9AEC0D5659F196BAE1510A126DD2D8D3087734F21BA4DCCBE7E6DA79D8C722015CEBAA3C1C14549FA9FA384DF76A4B7A252CDDC3A7DBFAB8BE3CBCDF9C28A9F3637957B40949068D7184E9DDCE545B9EAA555BA94FDFFB94538C21D19C489B42ECB437ADA6F1B64318C8CE0167FE0950C1706D95FAA6C1661090434DBA6F8E1486143E76A6C06B0705D6CC85CD5366C58F0CE1AA31B121F1C5AC32BD65B2D46D5F53B4B6AB0B5EA8556230DB5EDDC69BF37D0DD919CEE40543BBB29C11FE6DB2A7873BFF976B3F3DEB95AA26F42504EDEC1FC793BC58E01DB60C966407764F259345C125230A3B1002DA094BAB0F101182C14AD0B2B801D775518D3B772E9C791F5F8DDC6BDEED7A7DA1D7F0A561BEFD8C1D2891771DB57F38254707A7D54CD57945D43C75790C25DCC2D1E45A1C98993C5A3E4EF0FD44BEA2860E97298C0F02427E5348FCE051706A54B44CC4A4CE8F4B397E472C7AAF8BFC5600912559DA30223EDAE65AB927CA3D467A09414670A8BD60ABE00F029C1512302027961B1E0BF8784A6CAA78B62EBDD102D06E04666E262EAFFD3F2700C2193F27A43B50B4773CC0B5184AEEF2561852997BAA75C3081111C3627629202FD1151F8F4296175D5C2F892DA7C140C4AC4E315F48F2C70A564A11245D61F4F10A1C933A4E67B341C946D99FF6B448B672435B39D377CF6AA2A5B4423C419BA2470A39912E0B35045AAF80DF3BC0EE7A971031C84B4A4202C501EDAB24CCBD4740623920750C6157F183245A1070992812F6A8A8115994BCFEE0D830FA2AAD7A7EB9DB1016C051CF3E7BA59ECBDEE39303642ACDDAD1327CFF02CA309089388620CCAD00C30AD60CD93DCE6A9BB43191232D0B75B7E1919238BAD92FDA382B8A01F22DC62CAB6CC86A3B57896DF669B1413102A2073261D5BA44FC19499A4E73513697FA16FA459EBFCD195F74EA3358ACC61996E6959331CCD8565C402583EE89C5F5C19BC94B66B239956FB1B727411EFC6632C46D7AA04C1405A1C64E0C8E6804A21E55E38855D9F56E268B0CB759229B9671C1CE3F099FFA70F727F11750750601D05B5302D252DE9F4585B9DB3AC499227FD64F0AA95760F9547C1DB5DE3974EB12CD24B596EC2927C52AE20910E87CF173780112CFF71AE4CC7D2B6F942E00C6E29ACCD0CB922E8241B7EFE1D579AD97CBA5DACFC76B2A97D4256AC5F86F05C4F38035930FFB1A7174F2B35BC19D97F8B0A25939BCD7DB31382B73123D1CAE9ABA6CEE945F46250C403E8D1D498DF461DF0B26705BF97AD1515F2EF495472FDD7D79513CD46E2DBDE020983439D51ED9595125A024B4ED280EF8D5D749AD9795AD90CFC99A7941F28722A5EDFE8716495597BF867A506601B89D6B8BA62E488681561B45B733596D4B77AFBF7947C4ABC756D48087F452E23C57E90C3AA1197B7543D7D7D1E645EA18F892E43CF656C60DA3FA1F0D574C89637875CD6178AE2DD35FF2CD22D01D93F4762DC2B60B0AF1E071EB71DB0C9CFEBBF7174A9AAB12764E89410CDC985F34DFBD283B7F9A9007694031A090B9CFE10FF42A6A80ABE3EDA818CA84589F02CE63EC08169D28A1E7F7FEE76CD2AB2836F2751F0BD2060D58CAF80B0389756CCA79E3D29098CC7FB37E82D94DAF337FB7F45D22812C66617D0DC046A19590A12455B405630F0838DEC509B4EECA3A61263555967062FEDDF475E627E149F9F5A4790621DC25D5AB637E09F6428D1240078467E7302F5386CD251A6C7685F121712B649B78B3B88DF801FC5D943D5EF968E9C0197F9B30C8FD199C6178DC19572FDB83F84DC88EC1E10103B9D58223D8BCCA95FDE199EA3F3D11862CE64C3F622E324AF3AF4352775FA2ACE2AC32B30C9632D30834CB90BDA8A2C635C69236892CE254847D00CDDDA467FA9B52FB1E13F397F9BA6B68E6A5CCFE16A9824FD3B73862BAF10CD43AD3E097258D5B9BDA93E87828A1DCE2EFDF969505E0E4F7E2D144984674D1BDD790FEA906D3B72B6A2A6325E040F5759A68120D2441095D6986B33D333892 +rnd: 0000000000000000000000000000000000000000000000000000000000000000 +verif_signature: 13E899EEDCCC0FBA629144E4AC067906B5326B8F9A6CCBABE1444ADD4645160D2257828710D1EE106021B5641E78815575D4F095D015D8465C92D2DDF4ABDFBEB11EE5E070E6DA52E548DC04FDEF5472E7E5F18210AACBA04F4F18AE6686B9AF9657E38E3B9BDDB4AA84E67B4D8192D003873DD3EEE74700FBD81E381C2198B7CCC137C171B22F9353419C48C14B8D630F996340275F6E604B95C435208AED2BCA1B419F8363F0950E240D6F9EAB118E4BD3DA0EC3A2BE26A8A09857713C36DD69C34EDD2C619E88267071CF9EE5A60CA3142DF163F01D8D796AC850CFF3666078B318FB5BD17360C876C9C90D8A7F412C8A31616BE7A3745871548486715C94263A17B36CA49925450C578AD9D4B1C20043F45E8431994FA6D26A141BAD9E496E009E914616CA570C09F638D062BEC687333AC7283834537EFB6042F37D83F7295DEA30D50090B6384C1729EF17A0D58750C0037514E5E1227853BC5AA31E95BEEC37B1518269262EA35ADA4FDA77627EEDDAAF57971BA36D467B19A90B991CD255DB79B0154886523031D6C5B1AE8FCF9A4310BBC8197484B2923BFE0B1215A1C4D8C68390898AD53E3369B7053FB18B0D874070902A5D3B3D91D81D4DF1087EF7DC0584EBDC63D7BA3C0D31F86DA6C0FD08115C53F6AEFEC0829A68D2A3442EEE4736702D66810D62308A8CC82AA62182F598F44E253711B5D60788BD0D690EF98F9AD593E03CEF38B9C977983F6911BA1AB9F735E928CDA38C03E6AD8362F460AE4CD0F46E00EEEC74B6123498AB31E7A79D334D72A7A7EEF3B551E78D31BC2CAFFB139CACA4D79C8BBD52BD78F4906509BE42E7763AE6ACB898285EC9323E68676A8CC74A58C8DA8BE911ED6F513B6608707310FB45CBD97D5FF0D2ABA36FCEF73D46CB7F01C2CFE38E68E84F4A301916D2F510D82B4969BE7A0E9CC60EFF5C0A8717B822838C77AF4206B12545089BB2DD6A3FF012C86415BBA04FD7D4EC707AF3B17F25574766F1E92738E06210F48A5EF2550EBDF85A5CA34497CF1D4D3A758648EC41172443835E5091BE8F047823D9620C2AD51C9611AAEE39B21E6D6AEC870C8915E266476A50EECA599622F7091A34C23F14B40429D95E3EF98FED3E749437F04BB4A337522E6809FC104503E253B41C4F0301AF467F74D33125FA83EF712445A171FA40EBF4E6553E454AFE2568021D2B2A198DEC9BF720F9D72F81520BE87466AF70D00E0E860FF9ABD03978C3E429B5AA17B97F9AE93448853D6EFD168A30C6CBE8DE2D288D9A24EA5D2A5823332B84FD2CE793A22BEC439848D4E60F3BB9C75D7EB0871E803D61B07E749ED76072B27C87B69D6C014261F647AFA88C4F1EC55A75A50FB4C79D2C94C0503DB20DFDF71F628874188CDD7385C03381DABB854D4AA9F47B66438C43FF53EF5E78AB450B450191278AF6E26A7B5E6461F577F9852F81C90203C713F5B1F6C3EF558C9032516D8D62FD5E24E4F0F50718F56B5A59A009D5938DAD5591F61F4C659A760526EF41202FA7E5F6C7D5E0B0C0C43B524B66712C5A7C53C84C50B83EB9C98D2FD084C9C5F21FEE7742E6EFC8CBBE5718B70C062D82E2F986F38DF1E71589DC7987243562A2319D7C00B26E531E93C38444618CE758734FDECFD0C6853728C61000784EDFFED7B33086E168D6CB63E3DACAF3552F885B478262DE5E1E63CE7A4C6695D1193835E45A67918C42D39BF880385330310F2C7BF91E6C3E29B781D09870C26D76BD8AE209C42BC7432DBB4C16526357A5634EECDE93C51DD4D6F0065B2EC57AD3B58266539597C8F42B55271D6F90E986F6828D959EE800DBEBCF48236BA3DE2527E0ECA4A3C2A34BBCDD6CBB3A9C96DC3BE110D3499466E2857FBA98123A6DBA9014877E24EADCCA40F8AE94B2FED236CBE5BCA9DFE0CBA9A0F86241331859F9D6C087B276DEC9356F1FEF69B359F9FB384A84022DECB70108DAC8E93BB6C300C0345FC640C006EAEBC15113812FB37DD96E2A06A463AFCE66C59F8D714AA1FF494F086FB9EADA184563CA9D8808B16C19A824AD857DDE51E508B7041235F300ED2C799C182305389576CF393CAEB0D3BA3E4EE4B577A3E37B275FD8051942AE9154E5BD7C35E0F895523A29B0E6B7AE20BE21DFF567EC8252FF5BD0AA145015E11C6A1B941BCC7601BF039442F200619658D9D04021FACE6BAB5D49D8D7BC9A66C2BA3FDC490DA55CB4670838EB2D07245BB1227B024A8A5338E9428EA55741D671A79D6A14D27D13FB59D0DAE5239E1BC42187BB78E038011DA0D1363FD0A78F86261EB026DE7E173A90FCC017DD78F5A32D3E29CE384576A95511B6B4E56EDD014B160799BD1977F5D79E399EAA8E2B75C5EB33566CD8B63F3F4E817E290A68ED1E9FDC6BFA18E3E57D057F22FAA2F60FB634567255165EF418D182FADDF7B89F7D301069C485D8E83489D493BE56EEDC43D48200FD1E2B0669071BBF33613928CA31910BF2EA328EA864139AEF791A9ABE52133249937DA88C48D4C01D108A4685AD29DF2ECD4183820128440EE5378D6BCA6198DE89A97BBB4448A28D823A5740607C6E69989893FA7E299A7453D8DCB34BDB7EFE95B0C72314EFCB493C09D77BD0119BACF2C22E7CCBCD597F6A09FEFEDFA0A7AC3C90BA7519F4016056D5FB412BA02D0D45CFF3A63D36EEE1E468E6EA2F673A7A02926BB318BA73EE1B2C137DEF4A39E803FF573553E9A5C6AA1A1721CA54387CB1DFB8FA7DA726B2AE7A05453B400A19E53252789DC3206324B2584B861F00A250F99FD9DC7D513DD7A65A04034EB33D2D56A496B36ABA0A3008E30FC13824885D9E6F681A7DB62DDDE3501BD40775E2E2CC09CC8E4E67027202A81170A57F4AC198C17FBF95BBCED36D4930B9508CFA3E8BF6E554E91BD7D6E53233BB91ADC815761A0435DECCE167264C2F4E34343D1E5AF7BCE60C9B7B7EE5DF729A0DDD4BE66F82FB5E2CC07B038576110EFDC7D55026BE755EC1F02E4762D6F1DADFF41CEE6352C44537E685A50A075463217B92F7330CD929CFE3ABB5FCAA262093558A0733B27D95027A769E7DBBC1F36E8410304B5D597368EC2A632D46E8C2F8EA2BC44FA76EF474EB96A364409B2363424B8F85004304AD617693BDC388C3FC2961BDB15A1F5B20EF95ED998496B2938182FFE3B927EA9A23F6428DD35C8611C839E316E9A5327CC9EA82509B215CC966BE1C7848EF392DA1C6F369A336253AA1152B6DCFDAA7CADD4D9A1D589F73D3EF0FBF03882FDEB944B5B6CFE26F6AB5123829558C4C736F0B687AC7068380FE7F61BE6B40E3F04D7B36820FD86329B3109D02EC6390EAFC8CA730562B68082424FDA98D0B64BC9734B40B63F7E37AF6890AF7C2D92F79EEA3CCEAC60A6F380692F802B1556A78FE5583FF20A9C6A7BFCC863A9E7B62014D1605DE894FB585E2D4F94115E029E5857E6A0A7389275F530D3D80CFAB1F225D38335D24679197D48A018A34187DE3BCCEDE94FF8EC534C02DA724D4598D669E85A9C60E45214FAA6544D6A47D1C4ED7409D55B1A7F115AE15443A1C310640D116238493EF3EE2879BB8461F7D68736470D4B573AE454993F532301E35CB9EEEDFFEA82FAC497753F75019F2B3B02C70B64A579531C326072ACF1BD0AAA09F0A978B78AB22BD6119F88DD2D572F8919D474F591DAE9FCE4753C985FB250425F26561FFA9443F2376689FEB48C4CE514604526A100AF33F0D4337D1604222C4D9F93A8E69E4CCD36669090C5DFB0E95494229FF9B20CCB1AC81B81A36D63A850DDB33334DAA5146BF36FE18801E3BEBD0E91B5E1CFE7A9826850AF4397D1B07D3B719E57BB832AF4234C0CD9FD40B882FCEDA937EF9A2DA24592BCB5D1BE83EC5F03DBDFBCB335D90D5C8A02EE53D508EB5DE4A961B958F751E5F89A1D28895A3DB7B62EF4AE16D28FB789B3203AD2463D6EAB83A6D20CEA1314AE02A3FF6F653154AE144238186214741C236148183BC39AEDF44DA97F731CE3DCB61A4CFE14F9E84AA05AB1C1B951D20155233FAFAF16CF1BD0BAFE199E65D563453BFE55D5F474AB10594D738A8C106288D69D07A1688601463F3BD2146819C83726D14C6A80839B8790B5716E772F6C24C2BEB7E2CF37B3F42ACDD473E8CCDBE484D6E07B073DECB174AC3B8BB2EF54E6DF9E02071FA600AE55967EB6F702F719159F0EB065CC46048E875E7CF4271AD2EDAF910829AF613BA89FC612A00FDAE537B093AE8CBE6B70D0301FA2E13A916381C92ECB451A36E3FA8B7373620C071A30534EDCB4A3F113117A502D6A72DE6C77BBBF6AE99859AACE64A928C374BD2C4652AC97EB744D29A70CEA9A19D7013497BCAB69631433F9ED1FE20F80B5983E1288BB6A2BE91543ED47928BB5E462D01E9C0B7FFFAC06C10F152F43C329E89DF8A79996A09798A367640BE9FB53DCE27BD0BAA9BF021BF10D2FCFE5B13FD7D84D1C1EBC0BCEC26D08780D13B9947672661E0FA5FAE6F315B6DE40168C2351DE31F41FF6C533226E1BCE3F8E216AF3BE64C693372A066B175F726CFCD642BAE980292C1CB65E01F0729640AB009CB98892D6CFE40033455DEE73033B6D5E19C599F8A400EB141527DF2BBDDEF50BBD5FB55AA5EFDB35D08569B0297E2481469F17B87B508936A9C5C11089AE9E4B0CAC5749393C803E47039F51B5CBD42A6C9E19EC3F63C2332E87768A960FA02186B7A2B029265091146730463DF8B375F24AA83BDD41D1304FC2FB52DA10F1FED652908CF8C528FB2625F393FC8C7B33FAD45BAD47D383D2C04CF32E807425F93D2350721B7B2F596648EB5E1386B43D12EFDDB8FE2436AEC278EE76875B523C5431D99485773D9ADBCD014DD87BC68FB82EE474B22A5433AF9F991FC34B25834DF13099A46F568AFD1155F321B9DA9E9C06347AB3C1F59F7EA0ED6CF47B3E9AF657AA7AE9BF8260B969DE4AD24D3A8CE95E577D0441305064E07B9A2C75C3C43801FCEB736FE3D271BE1F36BFFC8E43DB14A162476BAEA9D346B5211ABD00608B15AF3B5E63A00FF928D1EA1A18D75FA7C6C1B0FB6272E55C3FE7E4D4205E5CF0A1F8718304E14F2B4CC543D0437341A4A311601A92E92566B7DFB4264E870E1B3A875EDBC003A561970CF8A669F3D1B69288CC6E359CE28CA65F9DAE8CECA743C1C8D9FFB5508824A8361E33B431A2E9E9A997847D2E6E43C83F02262E2946DF7726D54E3E6C9CCDB6D3F136346C11E5942E7A1BF850C2E99B4FACE75FD40886933907CCDFC0DE117702031941D001E2A683C5578FD3354212CEAD969BF1C81239EECC774FD0B883D0EEE824B10B879CF707CB268474522061E927B1243244115C669E9EB272B60A644F519EFEC063408B658472E9161A1F744FD66169F0CAE36B42E2379CBE81E6E51A0F53415184EA006B2270B33E2CA364CDB33AAAE77FFD953DB39704D490CE9AC6F2DD1C7A18E617419A9AAFB37E7239B236A4B74CE63E4A0ADFF855DCD78F6458E760BFD1D2AB95E83C03B6FAE0CD3C5CEEEEE1C69515965A335FCF78E80AA739339542127170B2C3EE10B0EAA099AC7AD4CD76E7FE4C1164E62F4E5807DC0061F77E4A8A528D710375930CB755B28BFFD928CB07BB4A107CDCABB308A48650DA4E574D9BF5607F583DAC340D72093EFB12BBF93410F1EF5C9516C744D2315EC9E000A8DC5D17A7B6F0D079D784B6D90193F6E3EE7EA0EABFC6F68C52B37CBCE8218AFA3670A80BC17B95D7B40536226358F04ACD92A1BE15B26A4E5817E628BA679B3527203CD3632628EC83AA4F2186D2F005D5DFE6F7FDB4FEDAC9E89D666E303BB568306156C56F09534E25C619AB3B95018F4896BACAA4834F6D2D8FE14A938AA10E53054F0008444AC2EEA2538C1230E6A18C92B01D9147FDCEFC9C8DAC1D4ECC8CF1F962EFA1B8CD3C969000B7EBAC598DCA45EB40BCFB19851483851CF340F3E8C237A9EFF1C9F21E4975541C61A8FEF2AC6057F59DCB23A80E80610CD85DB203C35D24BC82B9CD78246F59FEBB24832D7CD664C995188E0281CD7867900DC0DF44D409080268B79E95682885F228770734FA53518EC80CE2306CE1448524EF0184303D450C76EA63B733EB0C8DC48BF12423AD23889CFCDD891E5950047240DC0C38AB2DBC165B81E631002EA6F74119E27F9F86073BF2DF710818676980C4CB6BD53F9A5721778B89F59C68C8935F5031C8A93367D717057FD4D5EFABEDE702CC645EFB6D7F44C860FFF7637AAD972248C844D15133920073891C3135D297868B7DA86F097D8FB39C13BA14C4F247516ABA4C5F8CE3818482C8FF60CCA51FDB2CEE96BC1138DC04A86F857727591AAE6F87C30059B3E81B68055B24EA2FA983686498BFC9D9E7D595079EB646E85B212CEDD21D0087E0F2AF663EB772A9847B1DF2197AF13626B897C24637AF5BFE81816A8C90D3048375B699497143E577185A70E115058A3A9112B2C4351B6CAD009282B4F7CB8BDFC285777D7DFE8F5000000000000000000000000000000000000000000000000000000000003060B11171F272E +verif_pk: 2D1E6BED8452EBF126EDE70CA0A2B50D03342D5B13B2AE210F4562A3BF670CB15CE925FD22F26242BAE310B3AA413B6E78D442D935D1728A3248CC205CCD8D3FD834955520CDFB2C73E90E608B2C3FA8B7D179FDDCC88111C9E84171E9709B535933E492B6819C6A92EDA25AC407771A8FEDB4E711FB89EB7BDFCCEAC53B4EF46B6FBEE132A9D7ADB436E74A6D671183AF311A7A31429B012117527585F7920F348A6911885A0208B66DE30793B13FE1D57BD951F7AAC0349A785D26DBF1F0A91E5C9F4FA7435C44A943F1381145EDEB1C8A05EEFFAB202CF62CEE7742363EE69D8E450FF67C3962D6FF97BC3D02D6DF4A35DA3F89A48833CDF290F0E9372F65A58865FD4044AD090992AA159EEEF72B0DA7CB3A5E0AEDD67D828BBACFE59EE462AB696BBAD0E5A9BB1F5A51E0FA5DD44D8EC0DC4306DF2367B24AA2FB752F82D844E4C0CE159E3FD6B4705F3BD0563E0A7A4B94BFBA012B9C8B9135F2DB4C8C8DD6EEC8658DF30559BE3A17A772105614EFB8C1BE18110BE670F839A5727DF947FBACFD1FC37133584415D37C932E7092FABBF2D09D25C4CF4AB8ECBE5D8B7FA47CABADE71E938392861E8D15A41C5B4225DA3D16D393F28550860A86356B14AB5F22D0CF037CEBB40EAC87A24142A0219300B6476F96D041D1C30E3C52D245AB6AE7A1E5FD73C5829D60628B6D87FC889C3EEFAEAAB61C18EED7511A96C4932505D3833DD83316144488E2AFC4EC591812B999C1C95F31790003F6C95514AA29087824AF1D991236D94AD950EF66FC7FF4BC3BA0F6FDF262CAA59D2B55B833BCA67AA51EE1145F94E2DCF05BBD4307D8B1E0813F845490BF2359923CA598AB7D99D2F0ED8E0BC99FAFB013EDC7DDB86172073DCC3573A0CF0CD97E93DC63B882ECF430CE4392EA5ED8C8A1EC79DCAE64D433EB538CFC4979BF7A28651E8CD521B08ECAADF8969A981000356D589AEF84848672BACD3866969BC283B065C1ABCF638C2DC342B27DF6B8F03D26218FAE4E96F25566BC6FEDE719D38DC0CD55205F10CADA09ED914A4333D382115C2F5DECCD54F96CE4E5F268BCE927B21DCAB5CD04011E92F5F601862B20209BB0F956D933D50AEC1BF4CED2B2C2D43F9A25768E29875264864AA57B5A91726EBE6D730A8D89538233704420BEE0B01B763043A55B8FAB7EB8615F43701B1A716156F913312A643314009872EC328809FB64463D5602D976D3AA900FBDF0F996437B621926226A9391EC0734F52232B36566E06B117F979F1A8946CE8FBDFD2FCC3DBFF283A430E10272F874E6219677E1578AF79EB331AFD8C5D720DCFDCF79060F1FE5843D0B9CB3C7ABB8F1C0D0B5C701E20E3BAF7EAC445A75500A761C13DB25D40D19754C02D9F3DF6DBBCF47A6AEF6D1FBF4B455D3A587A155FBBFCDF6A1645712759A11A3CE427084549312E13A0FFACAF22591F14D8F84B1B535ACE98177344D6F5D149DB9E1F03F3CE7AD48E68C5186F44AB4D098EC3A4EAB582F089E5A9D4530B085DF4AE792C6C8189308CE9A8CE2918D91577B37C880A231100D4EEF5107948EF83C3C2ED50326B8727FB9BCD795C43108EC6FEE11AFC0A2ECD7C80BBE15AEC917BE37E2408365DEB34EB415B35C14F65FA91F70B5239378B947F91D2B1E8DB1257EE5853C169FD0C2678B0DD2724E7430E1AFB866CB53DFC4FBA56D03F2AEEE90FED730AF339809EB75C73EC82FE7225F2F0ABDA42288281935831286EE72B426892FC7116EDD149822E7733EFA4675F940C18422BC7536C782D3AE6E0DBF6FC34B674919F34B12F283FD395644053A246A356912CFE493FE26CCD601A04A84A81D85E6830F3CE66DD2CBB1148CEC10B3634B9CF511E0F9866FA7C03B9D25D754CA404D26BA718E25F5A7E39B25207F2905B6271417672610ADA30603FE82855D01044DE06438385E831E219A3902F8F9698552E5EC6AAC9686A78869B5B57E031DA968CA450FF914D67BCF9C036FD1D96F013DF8F311F3291790E89BED589BF0BCC7BAF460C8AA30B42F228FD3AC18C2B7C47B319E0F7E9DBFD463C28B1B585033536D79BBF80D9133D907E7B081D4B4476193F0FB68BC1B41C2F543307E76F9B1A3D6D426EA7775127AC8309BCF45BE747D8A8BECED11E6A1D1B8F190AD6D6AC654E9DBAD4C9739C8D844A91A37167E68450CBB10F4AE8E2B69FA953EA5C991D3F1A3893F9086931BF1A089C7F22357D48E2FD571CD36F190B3983E19EAC80F129DBF58EDDC6B9A7984FCF04CC3B40DB87A8DAD7540D5D5DEC8CA393E45E4BCF433EA64E15E944291ABBC422AB3D06023CE578EFFADA22B64D994A0800F8E5017081D16CF51D0B928B659EF78CCC996F9CA877AEED9155EDF5DBCC258E604EE17DCB3F990F988329EA1DB1C3856539030692E52002CF30FD5802E025B99BFCD1112645B56C60AE638E74D21E598789DE6CB60B42EE49856CBADE6DD53F4C567A29FA05C7CFB245AA772D0E763F25DBFD8E9F16BB429A628E693D387B6D93C398DEA28C0963DF5C23C29F280218A039D64F8BA81C1DDA2882A842E3CB50395EDAA6EE26F5E993C63EEB84F6632774223362989B0ED5FF25A65663FD28B486865DCE0B0C27273F1A4C6562C5DD8C65C41CE308959A9D64596D08E7B25E013FBFE7CEAF3670DB29A213CCE9975A913CEF4236E64003087709CAD6181710E951926CA55297199A608AE545875CDC38FE383C14562B48DCA6602EA34055D983F38E61CCE531AD93F58EC162845F538CE4843871D3C4ADF05F35E297EA62EFCDD5EF9401BA042A235150A09D9474A3FB03AAA19E7E37A228D5F5B07414C3DA2AD2E5C75ECF04C112B90769E19960E975E8D1917B3BFDA84FDC6D2326FB8A3B00F95D9C52650111572BEC21B12127CA570D8A98AB977EBD8D79A59375EE14F64B5B04FD969FEB03D0AF73489E3BAEFE7C7BC8DC7E85483EE62F02398580F83B96DD84477B9C48F0BB39F5406A37036D6F36E2B1B6B53FE6FF61C327B29D4E05DD2B81174C60B59C79CB1976BC06E7AC34DF3E38F7D2C1C0E3151B7147AB831774770143B927B5FEC5DF776C1D72DB6BC9981D65867713CF297C8B0F1E98D0E16F0CC227A39E47E50BA0116156D5B5467536604BE05CC2EF40ABCE852F15DFA2CACF86A789E5B7B0E5BB4B777CD7CC9F654779B102F78B5AA4B94C3B4FDE55FA7F7BF54AC225E1F26165B65F16D0321669FD9F6E47FCA1DD347096DF5DDA86466A57C5B068D9C67B7320366EA19C8993FF90BD8FB0693FBA370E66D2B203B997011B0D15B94E28BAA2EBF01774F7AE78F84EDBDAD9F65A450427A4774C60CC89A020B37DA21C791DAC8F7A7457E30D08B01375160039C301B6051A965E8A7CCA2AEF93BD52F82C020BECE90A129024EFEA4B2FA21270F8EB5ED6AAAE55929AAC599A577972957660CC47AC4E3CE772BBF10052DE7EDB1B8A44941F884C9F8BE13174669945629F46DE2467444F3106A73FA279CF02A800A047E20BD4D820B389C3BB6A868A5384CF5724C204CEFB1A6A1BEB9723E36DDDDD9C707C8F63E8BC26683CC8B43C7DFDAA408AC4DD2BA9AECBC3B6DDAEDCE094AAB58FF732B196638D8B8EFC428BBA9615793C4DD9F00F90D62C676D127A0E18C14C6EE9C990510B054ADB4B4170AC7127F93175C1EB22512 +verif_message: 14426334940960773BFF65F08D1DE489C4C3ED36 +keygen_pk: A3E61204447E6C0C9438D5D349EEC3919F9C406DE5823B6B0C10102F4BB447B401CA97141E75D5D7E6ED21E0DFCD76121C7CE91B2D2A9E518EBDAC3DA4EB711D158A9849579F2C9ACC8DA603B7A37C2176E263A45D9A31F35BDA4ED1DE5A17F08BB3342E89CDB0997B2FADC53AD8FE46E6D2EB8A804A00D7D72DF8AC76918C4A6A1B224473B69E080E52C818202C60A44C3501794681AC6ED2E767510392512C03D1F2CCEA0A60C46FCD946458B128E16241C12619DC0921F34C53DB45E727FFC9925C13ACAF2BFB19A09347B716D41818B5F8E3EC31B7A4284275FD1DAA4F387FC4CE92E04A7483C11100D11E7CB241767629FE017CFC107964C4B2DBD50905306CF2500757717B874D5E1932A7C224EE1611D1E82BF6EB300F2E328AAF0599DD16727811B0FA79E04EA07DC436C30BE7DC82A7E9C3F2E31937DC2E17841D4FB4C207B88F1840E9B3BA0177A4A2C47013264FF5770B1924A5EF82755B9A99729F15DEEEDB367E24EEB5D25F1A2C889827038D8631DCFD7A05063BE9DB602325D4F9B4BB166F225864931A4DFC664B3201582D97BEF1929902B300AFC1D415B632119740B7EBA2F8B868291FC4C74FD75D42623FCEC02D0ECBF23063DC3046A2D484DC68CC4B3AD775A8A90AD7837961F1D500C6BDD47F7DC6F8A03186717734A81DB143ACAD1224765AC849EFAF752EC9947AB028994A790EC4C938D7698DA826047148489F772183633F24259913EF522192A3C55CB1450486045B47534C0FB4909F53DD466CAA6379FBE37AA023D59F65D0B8CB36C1F3341DD695876C513DD44A40DEDC7F67215AEB25696596371AD621470AFB261B90FEE5AF42BAA2FA78CD3D530C16E80B793F27CBE4ACF94C7EAEA56F962F8A5A659F33E7A2ECD18C19A298694F57ECF611CDBCA668CFF31C1C2BC0133278F229454FAD2D4251E2113ED7DF79320CA80435D5231C12D75045444E2FD108B813FB8FF0422615B74AE7D1859B224906B048EF127637A6D3D16CFC970853F958D494C24C9BA09832E7863AD95CA237830CF04DAD2B41B85A891E8A32B62EB27B921C3B53700FBD23D3BD4C2FB4A39A9D1B695307D249E6214923B03B9B998DDF88DDCF6D1819C7885AAFAACBA3EC60A3B1C3B135183676AAC348DFDF719B122EB80EFED0C68D89C7F225B800F17958D0774C559EB4B4751576C39410CB3EB51607A8404D7C52F0B628B54FB8FDAA05CE6BF0CD3F68C64D2E1953EE461ECEE1A219DCA24CEBEF904BFDF794EDF49DFABD5E8E486314D0C5992662B9F481730C052CEE9F4916D62D79887748FF8252C515D02958B009F9FB1F57D6557465789ADDE9EC6ABC253A08F4DCAFA28BC4282D4E3608A97D741AFC572F7EA866948C3237A07CB7DB75EA7E4EAFB1EAE9CA382C97EDA8DED02A58A089C7B5F5F69CC9EC35CE2BCF6D106BFD4EACB1F3388A6957D64DAB8A7A9FFC23BCE34A8482009B9667C6B86C10D832950D3A071D9AE8517DEFA420EBC60613CFB021B4548FAD32506826B434C25A07B55F841B80C8FBE4A8DF63A0B63073E4EA3E17FF07E4016FA0CD0E8768095006A74048AB9DBF958CB60ECC5B9077770C620B117FBE6DA4612F23327EDD258A0C691B329011ABCF87A86E18216D7FB8876598E9611971093B7F4DD7CA425113165C1827F9F1F4AAC8735BC62777A17ACC9260411028791A25D4DB69A78A12C569AD73EA984B56F96D2FD7D2BBA126ECCF2ADA1AF7B82E9DA007B7A4FFA519DE072CC3000EC22272D718B172B127B21CA01FA129A315D76077DC3E9F67884FBB1AF8C8B71BB48AD1D7937C3ECBAE83C61C93EE71B67AAD525A5463A0E9C70878C3DDE1D966534AF80FF43C427101798EE894C8D4F48148C89D57A5657E50B831B98A7E6699A731F2A0E5C7C30D5C4AC4ED7F9500D1C3A2C1E0E8DA800D8ED77A96D239D59E7977227290DFC5A436191C6D68125F0B1A98906F19E2707809022157BED26D8CA0CB3055601B002134A6263BF6F93BB6C38AAB2AD8855ECACE472F9E48F83B6CFF455E05966FD186A7691FD18A9249908326BE552E9FC508600BF6C9506FEA5A1BA85882240B5D0D8FBDF4348B66E872DD8F3A6518F551CB4D555FDF6F30A01A6438D61553E90DB1ADAD595EA4B12D421B4B6BE1B41A819C3F0F4F780ABADE83EF405D6811F2CD177260B435A0C275013C33622AE7CADAAF4C145CBC7141E3392A3D5D37EDA4B22F465C46EA94AB6DAA9AFCC1FC4789F104E6497C96962A51B5DE28F386641A671BE8A1C3A1529E4F4922F7DC9C1C2443245F1D3EA83ED557692C7CCD4383842C08FA9F899161591BFA60EC08E1028F0CB38743CC9C1953B962071C1F7C60362DF16C48D52948ACE7C2E63E4243044E6872EC64AA580BC21BD9AC77C3E07C526FA04C1BDC2E219D891DE3A87BFC0E7C2FEB000713FB41069D3DD5196AAD53CB5B475783862067B859BF67E3132C0CA0C4539AE6ADEAEEDF6AFD814D7A58BF67B6FB46F938D9B989AA179D68CA7C2038F929BF60760200DFBC247AD11C419686CA7D569743893DE8E1972F1D72704E8EBA17E80E8ECB63A4D6D34C37AF725CD3F948A1C421EB31649EB5BC4F0D96AB872B599128657BC131958189495C8DE9EED7C97816511E34649601AF168C6A55F67425496E433D5CC57801DA38840EA60808F805A3146178D30DF77041FDBB7F6DF01EA8AFFD279A33B876A091D1E0FB9AB50E446B2BF2F045BA9D77C21AB29F6E108CA7B90F2E695F6F1D060289C61523F49183FE69D319604C244D695B8E734D6F100B3A9E521CE39CD0EBC42199FD419E2DA9ED2D187996A183073FD06A6113E96C2A28DA6950700B6F1E45412D98CC2101ADE58D99625A0CCA99430AEC190781E4F96376753920DD4E248324F3BBF8A324DD2E70B3B3DAC583B6052AEA72D7D0D3C449ABB9C33826F9209AC20A2EE9B7926DA3ED239986FE62E07F79DB2095BBFFA001B0592C475CB15E4552204B1DFB00A8F32A5A4CEC6E1B0FB601F518831A306673758D2BD6F9795A28547D0C7CC38D25FAEDAA2E3DF673A0BBFB64848305D43F1A44ACD517EF4A637A465DEB8D6B609E50F49F05932AF8BE0878ED3E5AA9EADFA1E959C9C56D516EC8483FE2F536BC7AC721235D75C6C2AF77653BC353F1ADB048024711FD500A274D74F8FD403406FDAB67CB665AA10B4349F8EEAAC3B7481353F285AB9817FBAE7D93965304E81B18E84DBCBAFA21E63E1EF16BFE132BDACB8E875F6AAF8EC5E0DB549675C40DA184EA75087508EF928BA4F3E2BE35215D32A18CAB85FDA74EF466796D515049661B595E6811D34A646DA2B3859EA8DC5A5BA870497934DA0B88C03807A21D87B8EB9E0D7A61D54046660C4C93467A25B06901B297975884B59D318F1676D52620763077FD2C4ADA01A6949936B0DB6538E2B110C7C0BCAE3B45B05B74AAC263EC2FBA5D1F130B5BC22D50164CE72F3BFEB71EDFC2A8BE6BA3AD280A7A828E7CCFF8D99B95E8CCAD000E91671D05F7948B43FA7EEAFC41AF4BD9B11AE951C746FCE2C6B2CDBC05DD8322424E9AC2C857B5204686FFF33131C05F81645727E6DE9B2229C0E527CB6A05F2679D2B9860042E188D3DC014748070A72AB9F4C369ECC16F2F4FB46722CACF6A12E969269A0743C0417330D612E4D6C5 +keygen_sk: A3E61204447E6C0C9438D5D349EEC3919F9C406DE5823B6B0C10102F4BB447B4292101DF48738E3DB3E1B68947D346CFF9B9578F61DD718A51A9BE1ED6FFF1E5DE1B58833F1677A5DE1215BE80CBF1456094A9C7029B228EDC3DCED21B1888A31DF46356C3B953250C0C85A62252245CE7CC1646E75CD6B35EABC4EEDE18E04999404C03206AC3A871810224E2A6099142421A488614A149D3C0301B125051A00D833812C8264800148DC192888996118B321204922851B2448AC8001237902120081012090AA02DC90886200609D1A2251046881211444A46119CA48819412E0BA248E4A228C0406A60A4711C15710CC6112389115AC04C42348C08000559342A50020E2480044A1865D0326CE232260BA380DC182600306D623428A33281C9C22CE438801C819101846CD4C26C093922D28090E4844008862004466A4A061202B060C33884CB225100300D48B848093920CC146904008C11192D21358ADC9629D2A65153B06D19164421A0315C82310991440B48311C46521AB0488882604008490804065A447189C2650000900100645A360A08810508156CD4464C43B46064B8604AA62894422908062A59C24D498865044902D0264EE3A0652123801A890C63166109A00912388D0B0262A23000C3444E91B4210AB481DBC4050423208C96682034292184248434451C816154B06412226121004D58268924982551320C0B112684022D1101091BC46043446E22990D49A06014241221838C18B0709B844408A28059B44D63448C410049103889C3928DD3884150B22898406C9A8465E2448591C824948491C21211CC422C1B927100C5904B380E49068CE2480E21C18911448610008901B1644CA0899BB42964068614120A041026811485DB12884A344C21256618076204486D48368E24A020D0385161387112A5854A862D12812D22374614302D02B3404806000B3360E2926DE24266189625A2046D4146055034308C32294C068419C5819C8028C0A828A2366C049964C848450B4232A0987083386EE1408122817119958524002E6390619CA08498B6604A8445A408094CA6616228468B023021494920A664C1B8214B887082242A4A260D914249131724A4A8841CA50C80C66881980551484CDC00642113118AC06919158C52429114B6010226329A826810018E881261442884C406644A268198484991404CE0889022042D8C9844D020112422698C40011980852403224A104263028E0BC34C0C314241A82120C28599C82900058041442A4CA8000914880A378402052664944421C50922920C1B086D18270894B684192321D89868590269A0940C13B29013958D8904481C94710B28720A9069108281A4B64101240201C0050034611BA031C2942191160660988002354D593872D4A20193C00D1825051C946CD8326CE3482ED0C2211C230263C62460205004B609A2C088132468623642A13040491806C4848041360562A27053420814180A8CB48009325193182A80380A94A4700A2580D4160513944C4AC09100C26C1A184EDAA069203332034325C838610A05080C8764094629123709D8C20CA228288402840008021A892CE4202619098E4B826402A04D61C46549B845D10689141310598848D0A28512278660B42019974C43B231894030544690098591890621C4389241320019C444020900D302868CB8214912451C325024020AC1084D43988CC496081820626090309C000AE028506038819B22259902488244802008329B1065CA942D9CB2814A408D6148618B326113222C589810CB122999064E5CA08D1835660C0592A0242524152D0B090A02066113002200445004A10513C284523492D4C424E44424141732A34028DAC06101C03121364890A869809265939281E1300CDCB400C1424C03C424414624E006445A162D20010208473182160E9C1430C9044904122EE314010921818214055A4440CA06520042868AC4808B224952486010225008C4000B918454B49102B52101814DD1382012A780E2B86840A24448140813854558A881522291A12851D1404464C88821C5242122614408701CC328480202D9167119256999C28903188D20C0896140211308109C205063384923994C6396095B443253104A482601989884D01069D1066C0214611215218A088851124218192EA10481C9A00D23972CDC945184A46D02166D111432F0788FCE7E7F0EE474116D6706545275DA90CAD1D759D4A09832990245BBA7321755489F7D92AA7C662E1D4A38FF96C239099E511BC1D006E48E033CF6B2E09CD7F8220BAE963AE1E6AB53AA5ED38F954472A26FC644530DB78780F7CA5401243C53AEA1D660CDA71B493F8BCF93AE1CE14DED6B98CD5B05D8C737A53C79A2C632A4DCA846AD1BF916B289354D02464A14D4E9BCDADCF705D7604AA0C18E20439E485EF7E3B114107A6D80491A2541FB6D7BC81333A050C5D3B8E7A6F0DE7673F657F9AB0D03FA890B70957B5B3F9A27577916982A80F6DC8E1CB8E95F59763047A6388CCFA633D7845F7981E7A34353D5C91CE65ACBEA6E1C723918046CFFB9C4B3F4CD1B1F570B5C517D5398782052ED2AC30D98140A1509BE1EB315E45A7B04F0486F2E831C07D63E7ADE3434EF383808CABAF0044AD6783A1228DF5A8C2FD308FC95F59D719621E82F610D66B922A2BECD6A42A6D62666EF797DCF28CDD268177F4CAAD9D6C07BC7BC0002EFFF575977FD943967B6179A09153BDB5C62DF3ECB167F15EEBC521A66FABB3012730F40B2A5ACC1D5C0B649BDA549BB069C58DF5F17EDDB69731908A681D6CAD1961B8D592B810523DAEB6FD3D50E2A559DBB10FBD336ADDFAB2EC5F82F56D18E42970F424E69B87DB37FD085E2026FE20FBF8EEF4588526E9C6DE6EC11C2305BD1B7B166AC6A7E58A4843B5F62006E6F2C7DB6ABACDABDB87688256F2FD425AE70EF03843397F0D05319BEC5DE36143FF973CB88A2BD16C43E1E6D70851F7EDAA8BFF9AB9CAD55673C310BE0F47FE1389248BDA7D5795CD0B920512C4B7D0A9D1F2BF5234B83ED03DD8AA2F633F9A598261D75F71220738AD2A4CA6261848C0D692B55ACCE6FDA95D7AAAFFA79AB017A009B12DFA083CF57C5DCD73B0DC46E18C546EF424510AED9460648D0E330E1766030A175F189733C3A8DC4E72C61D3D804D159150BE96A42690186A3959805CB671EE3A9FD60BE3CD7FBEA3807B507DE39507362FCBDBFB86B205F1144C7B11414B71A918B74B55BFED381F239B31AD61CAED288842294F3922A7F3DFEFFE1443EA774830179FBFA4301F8E3BA74BF38F401D672DBCB22CB6A9E0F1984420AE9257AF1BA122EB230161D28BB4CCBDBF06B3C1C5C4F5B551DA818E8022F782CDCC64A55FE92CCDA962877AC69667AC304B85643C1098702265ABD659162A4FB5FBFADEFA39DE27F230FB40F4B7A08DBCDAF5E2AB579E703D62B3F4CFE4DF5D5C84AFE5C6C6C2D6287382683C6BB4AA2A02ACBDEA75DF548F336178EC5F0E4D45A1295FE29CF11516ADDB737E4D830102EA0FC16CF24929A00A144D702C61FDF8F12E282134AB2DCDF490271108A0B0B5A5145C384F7758D602B108C9F563DE0A3239F4AFEBF161051898C05030BB8D6BE832E398E5EE032AEA53057C85B3035939F88C9215D71E20B98C1DC4155DC74A4A41F33224DABCE76DD8C385BAD9C0CF4B16760A04BC6F23D9B4AAEB36A8AE39ED03CCF1465CA8D8725503F3C8CADCDAB4A872F25A583FE97889AC54822781173F9E5AE6FC9FFB3B23CA8A61E620F6D234B0DDA0366BF571DDF33A27289B87DFE30C2EACD665B5F58344E862C37FA65C1E960DDC210F2FEC6807E454D1C4546F28605A64C0748073DC0EE40F01A1BD55D69A7D8DC0A223ED4BEBF83878D64C63F15C95E4D212134073929E2E925A74066DC8EFC77B6D6D049B7F5516EE565C0A4315E8EFBC941A054F5E9C73CE00A5B7AAA4015F6B07447BD7A6E45BB66A49A760613AA751D2A01C76D34ECAAB559B0B1DA951A0597F029B4595F347261FECE8A4EC72C70941275D2983F43697DD841DA7F6B31EF22A04932F2857395176C0F331AEF9307296E2E0DC0AC598DAE7D16BF3D84FC1D5340AA75D2AB4CE0D5E24FD6A76BD0E40AFE3DFFC4503F859F2A7190F07CC643372F3B2748EBCC77AB5AD93F9E2C1A3C84C5821F5F4367B0FD3FD1E05808398D75C4BCD8D69B0F42D573609E1B1871365BEB68ED23645F75D878CAAC40722A934AEADF80AE6145A3B36844903CBD1B082D210B72F2638501C1537BBE084E051C0BE28EE589489A123309C89071DCC21042967D0574CCC55C946E2EB0F2CAD161C317C229712FD88BB7DF52FCF071963427B4F2520205115C89D5CAEB88AA0C7218403FCB720D72B7EE900540E89F0E6C6FEF1A0380EE643B4EE84CD7DA319034D452E3468337A54EA647AF1AC8721880C2FFB2D4CEB94DFBCAD4084DD6985451E48E7F8C55620EA005683004567A643C5E32163C68E9BB90A093366E27CB831C0D668D5C8EBF3BFC17A472E183237BBCF61B077476DE3A40200DCA000E1FC6CCC19ED348BC1CF2C2C6CF99052A09A8DE314C9938E57603B96EE9A52F2B37D48F89C16F8F28FAF9CC5F5655BC7BAEA98088853957156AFF3D10C81456415FB8FA584BCFA86519435D7AB2119C2EC968683857B604264D798627EC56B1F68DA21F9CC4FAB4E35970626185A60A4ED690F77ECA24734B652C51B0683AE397988D648C652E4A8323D1912613BE403E1A7F775075F9BC8804BFF23E5077C372663EABF0EA1B8884CBDF6FEF70436796EB7A97575250CC4DB9580A7266CC0C99E3061ACB9A37886DFA5D0E68C447B2D81F7E72337367BB8AF51C9AEC0D5659F196BAE1510A126DD2D8D3087734F21BA4DCCBE7E6DA79D8C722015CEBAA3C1C14549FA9FA384DF76A4B7A252CDDC3A7DBFAB8BE3CBCDF9C28A9F3637957B40949068D7184E9DDCE545B9EAA555BA94FDFFB94538C21D19C489B42ECB437ADA6F1B64318C8CE0167FE0950C1706D95FAA6C1661090434DBA6F8E1486143E76A6C06B0705D6CC85CD5366C58F0CE1AA31B121F1C5AC32BD65B2D46D5F53B4B6AB0B5EA8556230DB5EDDC69BF37D0DD919CEE40543BBB29C11FE6DB2A7873BFF976B3F3DEB95AA26F42504EDEC1FC793BC58E01DB60C966407764F259345C125230A3B1002DA094BAB0F101182C14AD0B2B801D775518D3B772E9C791F5F8DDC6BDEED7A7DA1D7F0A561BEFD8C1D2891771DB57F38254707A7D54CD57945D43C75790C25DCC2D1E45A1C98993C5A3E4EF0FD44BEA2860E97298C0F02427E5348FCE051706A54B44CC4A4CE8F4B397E472C7AAF8BFC5600912559DA30223EDAE65AB927CA3D467A09414670A8BD60ABE00F029C1512302027961B1E0BF8784A6CAA78B62EBDD102D06E04666E262EAFFD3F2700C2193F27A43B50B4773CC0B5184AEEF2561852997BAA75C3081111C3627629202FD1151F8F4296175D5C2F892DA7C140C4AC4E315F48F2C70A564A11245D61F4F10A1C933A4E67B341C946D99FF6B448B672435B39D377CF6AA2A5B4423C419BA2470A39912E0B35045AAF80DF3BC0EE7A971031C84B4A4202C501EDAB24CCBD4740623920750C6157F183245A1070992812F6A8A8115994BCFEE0D830FA2AAD7A7EB9DB1016C051CF3E7BA59ECBDEE39303642ACDDAD1327CFF02CA309089388620CCAD00C30AD60CD93DCE6A9BB43191232D0B75B7E1919238BAD92FDA382B8A01F22DC62CAB6CC86A3B57896DF669B1413102A2073261D5BA44FC19499A4E73513697FA16FA459EBFCD195F74EA3358ACC61996E6959331CCD8565C402583EE89C5F5C19BC94B66B239956FB1B727411EFC6632C46D7AA04C1405A1C64E0C8E6804A21E55E38855D9F56E268B0CB759229B9671C1CE3F099FFA70F727F11750750601D05B5302D252DE9F4585B9DB3AC499227FD64F0AA95760F9547C1DB5DE3974EB12CD24B596EC2927C52AE20910E87CF173780112CFF71AE4CC7D2B6F942E00C6E29ACCD0CB922E8241B7EFE1D579AD97CBA5DACFC76B2A97D4256AC5F86F05C4F38035930FFB1A7174F2B35BC19D97F8B0A25939BCD7DB31382B73123D1CAE9ABA6CEE945F46250C403E8D1D498DF461DF0B26705BF97AD1515F2EF495472FDD7D79513CD46E2DBDE020983439D51ED9595125A024B4ED280EF8D5D749AD9795AD90CFC99A7941F28722A5EDFE8716495597BF867A506601B89D6B8BA62E488681561B45B733596D4B77AFBF7947C4ABC756D48087F452E23C57E90C3AA1197B7543D7D7D1E645EA18F892E43CF656C60DA3FA1F0D574C89637875CD6178AE2DD35FF2CD22D01D93F4762DC2B60B0AF1E071EB71DB0C9CFEBBF7174A9AAB12764E89410CDC985F34DFBD283B7F9A9007694031A090B9CFE10FF42A6A80ABE3EDA818CA84589F02CE63EC08169D28A1E7F7FEE76CD2AB2836F2751F0BD2060D58CAF80B0389756CCA79E3D29098CC7FB37E82D94DAF337FB7F45D22812C66617D0DC046A19590A12455B405630F0838DEC509B4EECA3A61263555967062FEDDF475E627E149F9F5A4790621DC25D5AB637E09F6428D1240078467E7302F5386CD251A6C7685F121712B649B78B3B88DF801FC5D943D5EF968E9C0197F9B30C8FD199C6178DC19572FDB83F84DC88EC1E10103B9D58223D8BCCA95FDE199EA3F3D11862CE64C3F622E324AF3AF4352775FA2ACE2AC32B30C9632D30834CB90BDA8A2C635C69236892CE254847D00CDDDA467FA9B52FB1E13F397F9BA6B68E6A5CCFE16A9824FD3B73862BAF10CD43AD3E097258D5B9BDA93E87828A1DCE2EFDF969505E0E4F7E2D144984674D1BDD790FEA906D3B72B6A2A6325E040F5759A68120D2441095D6986B33D333892 +sig_signature: 1C3B7FB47134FC1211A4F8FA16D8F84D642DB8182BBF70D549362A3182E2D313885BE701B5FBA7B2A7EE0B19B04A8F4E1D4B689028B10EB196297F56E75A5D037C89B141FC1CB41436159E014D096B8BDD86CD0946F4D51A02950FAAE06AA28B608F86A3AFE6A423DAB187CAB362176E44C7CF5C33E872D67126D4F7C442EBF493B685C5CEB7DE5C070E2D0656E16455D2A69A750EC29B50196ACA08DA2BA2D4048649FFF5E2923623561456897543BD1CCFEDBF854ACD1B4334C49C67DAA0456B1089B95D83CA8CBFDC1012ED3253AE6A97FEB8E8185D7A5A034F9B6F17ABFEA994AB6BA519FCF7632284056BFC41B69E7763A644D51D5936B235D8797A38657644AD0CDF1FDD05A0F7F36841ABC2BAB1EF0091B9A50B23DC37ADEA38D8791EE9C4728C612FADD4092A2EFEA38AECD6FEA2827F7D32856C6D1D43B0D30DA9DB79F192AD3F5AACF71CFD814E41D325D8D843D0F8252FDCA1279FC4A286063A69305F1342A46148D39580D290B22609B1B5499F1A5A698F62C1B0C1085A08E285E269EC4348494A188FE0F7C761A8C2D33D74831DF8969AD5464B658E9F059C70DC50F4D9FAA4119C69526446D59A46A0482C6ADF363E03887B320CD7502136712806E02067F4D4C2637EDF8F77148F6DB7EAD6EF797D7BDFDFC3438570B7FD144A4646D5FDA70C55352FD4789670F57156A3FAF6BFEEE142633C626E783BCB97AF8350833B159ECF162512CD5851F91332B8477E82D14174F6E52585B5EEC943AE99044FDE73E7D8035FC19D56DE56560DD541AF03999B415A927FC3036D80B4D2C64D03BCE42B7CE8D9863443D0600060DE7DECFF046626095F44A7BFC258DF7CD0C7570F5E06A158A6100CA60CDFB3C6C84029F3CFD1A1F7D355A537EF2F5A8C6FA5D1D8ED7D2FFE100EE943B8E20215F4DB23D5CCF782796BBE524A9A83904B4CB0F74FDAE0444B6D19D91114FC5C8A186EFF80880F7840E53788D334965EFB504695C6E9FECD2CAC50A3ADA9B2150BD88CACD80759066ABCE3868529F475A915D1341062A32C9D7713C8D8B76E9081EE715DE298E6A28EFACCE23B1916667396C4B0DFA3E12D11A5947B38D67B0BEFD14244B4C847FDB72226B3DB2C7AF3FBC5468768DD7849FDF20DEC9E8101A68F9304CABDA527799EDC0F994DE669B1A57E6E1D8D60893FE37157D72B0430530E362EF790CADADD5750FF38A146DB00B7553A2357FDC5E7454068B7694FAEED62454E5CFAEF69024783B5173D5BB761CDA8986A874F02098D93EC135B03164C2219FB39E5712408679147FF71F06B9C5720AFD40AD2561CC88354997066366A464454924B49DB8703EC24F807C5078FF68D9510587B2DB6BC82A6D597C9E2941E92444A7E3B85AA4766605E4C3D726589B35E2CD5DA4EC4288C615D1CF3582EBD5E63C2BBC60F7E34849DBDF734FBA6D4D8F4EF8121DAFD049AF1AAAABE4769FB9CC591D2206051BDCB9C102141E540BE5A4E4242FED93E96F620A02CF02A07E12A5AD8F3C7B6312198AEF2507D065D3614830F5C5441F9C53238EA649366A0EEA00350FA7414C7544FAD497EBD92D512E5CDCBF8C953724935D97C0E1A3079D45A40A54EDD9910E52F88175D0E4FEEFA0EA779404A24AAE32979F0AF3EBADB745B51B09FEC972145AE3EA4E485428EEB4C610120F29E9D94B3E4BB9B468582DAD685CDE5B5A900204C16863C21E1B5059487D772955CC1A74917FF8C05FDF4BBB8CC54CA2D59BD052B23D5ED7AE1A0671AC9713C43D1D7ED55087CE75245B496CA21011C08649471304420DB1142C2733FA0CC8E00AC670514882918A0B93E6969B0FE214B63E0A2F2A9C092E25D5E6870CD9BD98EAAFEC1ED886F64C9798834CABFA10DC8F45A86E89A0CDC301620A6202BED8C6B8F28BDA260DE8FD6CF502EFFF40F88E1F44786C2A062BDE1E44B237BF8BF5206E7890591A9E09B6AE196511CA444717E6A76A005A55F5866F010D79110A8F98217BAC2A2055883392ECE9E5B5F423F6EE28FE1F88370DCEDE6B3EFFD93A427F31AE8AECD8FBA6559ECC8E23C595147AF5DEE50E4C938B6D3D19E02181260800D5A12DD883F186C94061068E3BBC1D7366259664ACBCE927587A20F4BD80A67F72BDF405DD5E4F04679D916D5BA984CA51461EB33E0626CBA8FA7FDD6B3DBE15B21B776B18F6C7B82328486CC2644E9C6040B6F8DD97772D06944B7DA8B86E3876C2FF14C5E995D61F19FECCDFD79BCBC159986003C77961DE9487FDEFF065D22C639417C10DA0ACA253B9025663E54135DE4AFA0DB7A0D5AD1189D94DF24D6A71CD5A7858D48C242A7A7E3AEA8D67174B444791E2F43A4A081A99F9AEE0CC2859C0855326E2036B1FDB89D8CCF00AF787B459AA6049BC67FB54D82243E4F58CEE58E9255DEA3D5C2254378AFEB290FE78630A9065937873498B350526943DE231F5384EA83E98E2317E013C487D57EC50D1C850F83B272437BE365CA11F1C94C36F6A36DD99418287D0573440447AE18F6327AB2CF8F81CF93AFB3D09F6D814BF007740307BF22A54FA24B3D9155C95735B22FBD66B6A63DF6771904EA8DA54B2D4BA8477534065BB20B5653015BB67EE514EF0EF4EE07458B4196884C1132927540C3EB78C76E795F433EC0B43D544FB6F28006E9E49C79FDB9181AD5761E111346657E89BE1E2F89A3972C20101BA1F7F63C92FB9F299597AAAAD2C6108BC5D314927D4EC1D395F1B3BA859453A7E292CA8F0D50EC63A53A9E40248CF779CCDDB1A17710F98041FA9DC793E0EC53AC6CEC4286B891AFBD6D41D67AA630DF94818E56AC934FB14EE8C43C2B3893D01A08BD85CC69230FF416BA50CAFD3FD7BCE3E7A16379CD0F7E5AECD4B39B856D4720E8A0410376DAA35ACEF23A13043DEFD5EFB8FB210994B62E8088294D2C7497F942ACF47F1A1019D9F12041BF3AD086115282C1989DF5A60267647E8747BAA2F27BC8A48DD730199831F5AA61B954F01117141CB75E37C4EFA3377A86B8AB916874F348C9987E590D91B908E04387B7B654A6103D2E6DE8647F3953F0D4B9E977FAFC3F1087EE529E31C545994D79FA3DED540185A8FB1CC87ABD5D570959B061C23B3D98628A53E9C6D5462AB3E53E773202B38370EA2742E45161400A5FBB8C86C1EC5F55B30CE923ACD4EDAD4913527D1C9DF4426A851101899B0A255B6688D0A7134978139C56B5D25B8CC389DD3972E38260521FE85BB9FC0F319AE57D18D1CDE432C7F34C1BFBC38A759882132B0CE158023DA883298051E51743390CCA45F525DEE3C6430948C906876664EC00A259B70E1D0AB9CBD416950653BD507CC0BE87CA531ADE170917F4856CD4EAEE9E040EFF1FC4525F38C6C032C03DB831AB978BFA048FA8D38F5213A987E5780AAB8D989516FD7AC235A9B4C17A7531FBC22492034969D2ABB1182029804F1C6F40F9B0F39A980E6506920A6EE84847F46904EDF22A1C78DA1BE49DA78D681F049D123DB37798B189905EA20583E4FADE7B153ECFE3228D0ED92927B2E50AA74604A9759232B546C4DA752C66B0616790CA1AE90FBE40E25B2B5C9B5E261BECAB2F7B9E4F1541F5023631DDA694B12DA8567D7B62B755768F76CE749DC95541D49BE856121050C6BBC59AF3EF47B2F892F7FD01E4F192756E5AE1EE01F17E8A96B2915EA1C40C02466214D38E7F8AE8749CC9C00F7EE6DACC3C26831435F3913372AE00EAB2398A44A12DABB865CC5F6C345190A1DBC2BFE8497B3FF1460F49F04A9086A8250931B239322C18A08C28175A6B32FCF602F62A1C7D2317C2B83782A293594E4A02DADE59FDCA0EF3F256EF254485957FE5B0BF0452B40616FCFB7836138C9EF26109D9CE08E99421CCC07B0B57C90D51583C4DE64D8443359C96BE5C94CE11F536694FAE8463B6D04DCDA912B7330CA4664D179E824DD6BC6377DFBE1EFBBDC2F9C5497B49343BC6439F88C9CB041038971057826845BD39EBC8C7735AC7C062DD7EB67E20D7391726C998543470153D841026F53805216DB0C4D90B4BE069956569F00D416B9CAEE4BFC1BC515C20CD7FB2DC1DC58832A5D2EB50348A86927D78D48632D4C69C58D58FD39C449F1E06D1D41EA1A01FE0F28C68188C85CD76A56E3928A3BA2E2A028DC6F445C446123D69CADABAAC9698C3DF1F2102D1FC318D6E0D069D8BC31E73588D3999BA04B5A6DA47F62E7C707179771E0A8C4982A858DBAA9A4A65675E098B644468A0A3C8CB4248ECF871A3BF828874BEA55BF181BB265444AF1C77166EC6CB47FBB39D21A8110FD6E6458582C6FCE348475619618540E4535CD27C195FCD21439FAEE1268BC52EC6DB9AEF6AC082CEFC1BA389D8AFBF071987D53A39ACF5599F2E06F7887EB63A312CE4C3D07701CA62ED2D486F64860BB32CF227929F9737D6C80354D33407A98DBF303C1E87698F18FE626D8A702F6AC095EDE418CE6A88144AC74CBB5740FA53A6FC3EBD3E27C6C68F04914789AB5BC1241609F0E9FCBE0360FDB809528F7457625D137237045FC46DB5CA5A28088ACAD2CC8D9A1EB90A1F271ECCCBCDD4196619E1A3E911BBCA9430A2AC60E529351A74EED7B4EC0CCF6FADA40A4D918C54330DFC85FAAE4F5D872D02C0B3E074A9958E8EBF433A8BBB08466C90B1D1CCA2E194277F22D3265F624786ABC21FD893422313D7AC4FE43866A6768652533A6140CA1F382FE556EABF7C53BE4B72C7F3821B26AE6CAE3CDDF595CD0F77A99C2360F98A9092780D4635311A286FB16A2429155BC57A03CAFBD1EB23EF574E9C93DDB8C11A811BA4CB025583DD53F7B8ABD9B4813177894798C9E9F5E2289855E586046727BE7D8C388F6443B0739AC2410EF8B980FC01A2F4AA0A2065D89DCA877C263A2CFF9F384A0750CE58757120D45BAF7522EB532EB7F5FDCF3111DE2291396EC9403618BA4C993898815E36CF07BCCFA857867AF949888118B3C603A76C0AA25B349CADF9BC29F7DE9DE518B6A1B5122ECE31EBEDD5EEA10F4A9A80B3949374DEE28D5793258AC1C73495B91695ADF6512B8937522530D60B74CEB0FE97D16DD642E094E51F18C875420ECF75FF23A544435363046B2C74B89EE4B13231B0C674A74F3104E0A159392836A7BD696E75C1173F20D525976AA1D68E21288EF072B3BA58574FE08AFD0B8F5407ED7CFB2831D44C5BD55ABA143AC446D5D2A906F0EF89A68E231D8BB2670AEF18CB47C45A632073715A6143B0759A24C9C97A29D8F8CD69473795096115118E1847329BF98A4DE168658FCBA2CEC9CA8BAC487B9F155EBBAEF2DA4DFA5DA707CDDC00215F8EDF7308F8FA12A722963573594281D528696DC9E36BA464D107031D973E84DFD1415485BF0E800A56080B040BD58EB40EED553833B3BEB8BA9E5F9F815C01C378A8E9FD009558F707078BB3E08C4350DC0BEE5F4AB3665D2A71E70531B3DBAA65CE945C163D3AE88DFF91AB2646D5354191BC01F405984528D4CFFB69A8C86A25DEFD81B40EC75832F0A02FBDC1628E3BBDE95D83A5FE6B0893FAC48E62BA3A2E920B99BD0FFB0355A3F3E86B466D561214892F7B6BBC15AF8B71E279278FE563C8B7B9AA331931C9F2C557E2620AD4ED6FC21166CC90BE76029588677FE706AC280F0070A27606F956530F40BEC2626407AEA8CA7F3330F45D1AB52E6F1673FA6EADF812BA5FA55BF9DC67BFA84E22CFF9615917B6CEA9893A193751CD917B438BD1BD48B4BA1E9F1AFA6436F5625A89122B28066EDAD6284D07B123C29017DEC63482976ACB6CFE7340B2CAB9098919C63E1477EEA8958F318D106F97AAF17D18A3E4D8CA72FDFA8593F4F568174A3ED97ED7D7023D0B80660A0EDDF5DA2534AC8825FFEDF9412EB33E4E0DFDF71865C4C0A0049DA2D77A80C1C2CB1E07C1816849D12131D8170AF08FFB8CA0D0FC69099305E35DF7259606F5B31FB9C43EEEF984ACFFC7DD5944EAC643B6FFBE7051CDE6A67A3BD8B617611B2D759EBD34CA9BF12DFE45DB2B95F3AB65BB5226AF7975ED4B352EFFB9E833020085660B951541F39CDB43A7C7A6AFFBD0600764BF3B03C400FF1B55B6D27B626DA06F7EA130910F64076538DC05949B20D7AF51CCF4E0FC688439DCAECB5BF76556D89610E03D0E138088B9CD6F1C8C5B81EE3AE7CF1751BB2B393DFC086ECE19FBA8920E8250FAC575907133D7DCD9084F2B63AC5AA9F770E885CD6E401763E1CAD8912F3C709DD8866853ADC149A85BB2926FB17FA4DFF9B1D6EB8DC84C5424D0F4A54E99C11EEB2830505ED3BAC853E7AD8C377E849110189E5746E8A8677E0A8A399BB35487EF22658CDAC854BDE55325D24DC1385517676DE8102D2B2AD79C4FA3C3AFEBFDEC78FD0950A76E2C0BD3C3C369AA967EB2E622DDF94B348502632C4230A36DBECCBA333A8CAC5E5283A3C12587182248535F686D70D9082F37383D505D768BC4DB030D346797B0B4E932365365AAADB1F0385796B2CA2451626B737786D3DDEA1A5CDD114EA1ADBCF400000000000000000000000000000009141C242933363C diff --git a/tests/PQC_Intermediate_Values/ML-KEM-1024.txt b/tests/PQC_Intermediate_Values/ML-KEM-1024.txt new file mode 100644 index 0000000000..c605073df6 --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-KEM-1024.txt @@ -0,0 +1,12 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +keygen_z: 7AF65022E0A472ED6388638EA29D82DA68B4CF9FFDF2B67CD708EA5A370C6A7C +keygen_d: 7AF65022E0A472ED6388638EA29D82DA68B4CF9FFDF2B67CD708EA5A370C6A7C +encaps_m: 034FF14A56249C2521D4279EBA3D04931CC892BBC45002B5B33D9F0188ACBAF6 +encaps_ek: 27669A667667B8D5466858602260115B6209BC2C45DF7A4E64932B75C78B9F7083F131BCD4E20EFF8CCF69736BDBC88406F9B69AD3CE356A0F5E676DD0A7C4ABB1A1C9D62021BB384A4014FB04CD2F821890D90427C49F4A628ECEC2731FAC025237360D582CD06647B1109AA6C2AC5D433758C1CAA53555FFF577EBB521FBE32D10F790604C53C2F82C17B08EF3625674214844906DB3FB9520031422A13BD7612D4201C27D15B9D194830CC3669BB8BA34C2523764413971C40D84AEE65675D5215309DA8367F001497546ECE07CBF002D781B830682484080AD6F9558B36B6BF610917130B7419B39F85029621264CF2C8AE4D808387B20CC5AA0B969C39BC80E6CB9CA0351A3F60ACEAF12BD41FA0996E39906A9B61697B747C2031C760288364457425BBBB40F4898AD085876608A77A5EB9D124BC9922651B76395881558CAD06F3C4BCF08E45B67BA516038A364B7740E9740EE2B93C5C65F49020AD42B3C0AEA5BF242A4F1B089B5A3458BE8A371CA1F293C53F2780ECE281293D991E6E579042BABC169724F10681FD1C7D2FB1648B0BF80818A7DD3B709734D38972E3E44875AF0927A9AADE82613FCA05EE5B3210647A5632AA170D09E70B56A2F04337A337EE952383A1A8AEEA6CDB90CCD86A818D1BB39465BA313D266BBB10581FA187D926AC3A8B749F64445FAB56C9927555793FB4ACFB039B1AA543B1B87AE6A49AB562933C4C97BD74C07BF29851A469851A982595596FE7ACAE0DB23533028AA34676F7A9B29263E7AA27900104B1BA1B5674739B2FC4ED8A330BBA5A0B6247C63F1153DA01DC8F616F10483A693A634C1BA6AE1AB2F163400BB5771E70171FCB54155ABFCB2044FCB30BAD67F742183861819EDB1AA6C771FC8E11A92E08B71F40D036C15D2896A204725BA90A03B478D98C49084382F1D223FE12980E947A415E55FE67B85DA40441342445B46C2FC42020D04769A2A1C64641F0C36636BA6C4652B267A4B9219E333A06817B5817B6E6CC485E352614169ABC20E1891B7A000C52AF15A7B904C976C1BFD3A2377EB76B55033C7C4C69E7174AAF27715756316CACCCE63A5A22435C7D1020443AA71693BF062303D13331F795424C20D266C1D90305FC8C2536684A93D506DE6329B6162405999BD5CAA7DDB9613C8238CC6D335A1EB4082E7710D079F87A4BFF6478B5F0C587786AF427192D9A34A4FA33BF0D3CC58FB463B4838CA2C337E65397DA15690C52AC0E5468BDC03DF5A62F7020934E267E0F7CF95599435F952FAB74CFEB4308B173F12E073F7F040DB4C63C1C48A7B7A41F4779A6B57A922C970771180008493D4C76805400B7C664D0B92B22C49551B1247E62C85E1E540C82093371013C4676CEAD77C5F3064A37349C7165EB3AA7DEF8731E9D66A56368F195C045B2A50E59786161A630D280089801298C130E4483150CA9152C2A0F247750C062259B84C28236C3FB54625D5CDBECC68DBA22FB1558055FB9B243501C75851E76ABE4847B9B972A73411A6B4282BF5983A82DA7413E54BA35BAB37A9B3C62884B643C134165C9870C6BB390F6B7A1E5745158FB251D6909433551FEBD30BA575A1E2F10958498D9F147ED9531322A16097F55D811795457912912B1C65F38025429B3E764A2E1ABC4E30C288082742995590981C43DBB365966BCB9720B178C5EB963B82934C02814B7525546DB7C96D65822E4942E4A4AC13C99490E7AB4A702371F21316A57906B19258428801192567C2045BF8775CF58C5DB28BA1B05E042A1859E64286B5B114F39FCACC127BE63DFF590BC184B83B168C30199890374100E40D2FC7752B1430355022F3D58925D1991BF3B98A90395F8579646C8413BAB3C0C0707A238A27D09FA57A32FF85392FD08C2F2286ABDB2B6936B9D3503802C6B51E415B81673CC78054F1B2C4BDFA733E5264C55A7C4DA5B73944402462033D08AE620BD05644B477AB315E936D3F25B5BA7AC19EB559A5C1195F568B313C2675092E6DF58FF399C42CAB6363AA033691CB8CE06699E701F2B92597CB8FC23516E9F40CE75B7BC1E0520A5A3895EB7D8D474009A0CB0ADC2DF476B5164112C3B600B6776DAB49B20381A4014691652A3C3161AAC6616CFAA265638C6C665A8454F36780B789CFA35D2AF49E6D5F482BFA3C864B0EF29E18D2EFFF92DB1876A22076AB1AAC0A7393ED9E5A48 +encaps_K: 46C200F3F6EE8E11D47653801E3482241CB783B9D794EB116A4BDA085AEB6BB7 +decaps_dk: 0FEA26C4A544A514444A971B5C5A825827C09D42469E59344CF2AC06A28D33E9A012CAA3717B2C3B290A0715821109C4CCEAC49F341DADD377D42A37261916AC7BB9E41C096CA8181CF58350573F605684A1BCA53D88257453C535165C4ED72A9FF05645712901F66C10D04F5EB4A2EC3772E9498E9DC44BBDAB71BBDBBCFC85B801363089EA60EFE586E1E2180C38B2E7B4A63ED607490BC5BA7A58AC3B1C0E43967200C7980290EBF411828439EE8C8E6129B258E13D127CB15A00CB7B468D4023B5097B9B2E509B50E890B63B4707487961A29E18656DD2D09E6A3B8843E2843CB4854F18116E717DDB0355A75135B2026A752C8E7FF18E0F4A391CA37F5B2BCC88C999B4E47750C46547EC076AC21530722CFAF9679961C98688C3562B17CC808146A12572C9B5FF151AAB54410901840E26423987C5E0D28EF2EA53EAE5951E62AC7BD518B9830A4DBCCE6A936591EA8EF275078A0973852A4D130495D00B3F21851599901CFDF9368344C810422FFEA08AEDCB1A7FD3625F26B034812FA307AB2C20945465546D31A341A4013D8189B4F50FE860A668DAC7B103441E961FCEB0C5B1F34DF2E598C6D8CF60B864150C703D2BBEAC9B001AA2108147AE6B8AAE2C7791DBE956C1F9B2047A1576094387064C3A801B0D89C996A5CFA3B012C14438B9F3530C0C5FA9389F10FB3EF1E2013338415F7B1DB411ADF91C73B6456B68AB7CFC7BC929E44E58EB34CA10AE31F03B2C3BA6CCA27EB35CB1379A130AAC87E3B875CFE253AF03C4BD783F18C5A2F8492BBF7C56875598B1B63FE6CB0694D0480CA1C8F8867C11B8BF33A32C20B79F9CA486858610B19783BEF784BF6B0F858C1A791130DA6957F212234EC98679814BE839BF110B45C1C883ECDC3DB3F822A4F7C125566ED1663568C8413CD01C22467AD5201A0ADC763435A2CB05CDC47072A94370F5B434F75C078B415993E854DDE17BBF86C0C6C9A3248532D9C2139EF3C75A9BC693781060DCAE2FFA58D9CC548F19C1CE5364880C7FB50CC7BE405312D6CC94037618F388C490AF8F61B9B4044CF75A5CD71A15853B5FD6224C6B9590E58501D2814200C919F283CC2B49AD8BFA5BAAA2977F03823F609EFB2426F936C30287097BD6B7BDC67862858883DB5954080429B9CD02CA96BC1CCBDB5121DFF805B0824AEE999E2BBB2D82353E6D3A300792781058C56EF7098AB3584EA0621E20337D3A975D93CF32586D6A71A2C4BBB202B853FF09C407B43B1C19B1C4CCB821482DDD27378177AA7F6178497C3FBA797153848C5D0B1F40B54E9D5193904A303F725F0CCC66C6CCB158850605346DB42B877DD9CEA5F69C12B221C7EC5100F76587B9834BC0C641538F83E85BB3090DBAFBCB0B7118FF7C97E95263157041F8AC4052D0403500CC4F689455974CEB5B076790A050E0B3F6772A7767541FF6B67B2A1D5407820647688F360A2B01473767712909B227658BE6457848C440757168061888589CB05A999E55496791B11AF2066BB8CA746051C4680A0BC07382412AB8B8A319DBC794DDC694BFDB813F80B58B72218DD64DFCDBA1AB48A94F7A8DCA9266CD15A42D9BA5FB6767A955526C050DE2598B112A2B103AA2D1F0606FE68A55191EF53B302F7C1922C301CEEA989A62134090A86076776FA44627B7316386576A678175B218E6F482B52BC6027BBEB34698B9802FD67634C1A94DD4C5CD49EC6E2D665F727781D1EC10AAF66AD8279B9BF24C99E875EC94352D9605FA30CB3D8B2686B03971A760B3053B34346D0D71B44D8B7D2EA61A5C10A933D38BA48336711174546147D44B2914F85689D9C1BF0037C7F7377CD930CFF60F84B0A2005D3EFE55C7311B1B6132768B5290D836B82BC443C32B4FEC960219DB2132F7990AD684A3729F3D1A2CEA3A1FE4B12675C489EF33198F01A106806EFCE8921DC46E971C0A0A564AF9E56CA727A7641C568C95AA5956910B288429F80EE7226E9DC4067E34944F06926D44B2CF8764F713593B4429F82B8FCC607798916B815B9098330EC334290DB8C04B083DF3CA10CE3575073028E994A25BE72878492FE1B696BA5CB1A773193A3B28A4F440AE582DC7C24FE7451D6676232BB961C5040C9E5201AAF3CD4DE40AD5A9578AF52810B593E9815E23F63F564061A48407213AA1B0908F4B174F86D573FA04386498BE68398E8D720D278111D8B17303602A96E35F56FB25173C4F4A03CA2AC9BF79DCAB764BCE4410401E1013E6528CCC5113358577DA8375E02343108C2924D2551E5CC5A1B04DEF88324D854FC92C4ADF7C2301337E4520BFC365566F66092E367AE60612744653C1EB47F0820951A2A14C425909340D8727188EAA08E48678984876D0008DAE99015B3663FDCB725741530BC3895B11620CE3B417A320E18813B99C235AC06F55600F983882BFF00236107B5042545B6B775868AEFB79B595596902C69B9ECA3D358C61FEE036D218AC43BA3F52C06A8F881A7ED70386142CBAC5CC04FCC31E16277651CE2DCC5014F6BA5A915C1338834EF474B6715913BC7A4E593C688766ADD70698B37E06E53915F385388C25C4265E1CB44FE3D019D121AE4C32434F37B0A4CB69C7CC95707350C3493D0FB11CD4D09F29DC56C07BC8EB0BD0082B41442145663C21AB433467B95EC2478423C18BF2EC703EFBA28CDABD42B7B833150D6DA25EB00A8328902E2D089B55D69AAD9A94D818264C54B04D614D147A30ABFC03D9929D96BA7F81865DA353C454BA7AA7881AB974C1B8F0831E79C4418664E953A54DE93213697281341D37F508E8CBAE3D8185054567DEFC8E3BBCAA4247907C483B8F1B84B324C1A7CA8442DB6B7B128C8313BE1FE25791209B864A3E1A618D56D710D6F3BF559510167C464C6B9B8BC490B8E03925D03D0EEB5D78179428BB80D3FB148840709C41147A686FC9BCBDCDF7C7EA7C30FB640FF05B7539ABAB70892908E93CC9C347F8AC889E56468A135B99754738E15F4E677DF375BF1B43606A2C47380B10A0C14C28583C83311A2854B2A9931FD66086C10749F334577FD70B51B95060075199319B3F7CB5B237302C370A23175E4E013C56281BAFE2BE9F825A3066AB8BBA5793E21E7A48978CF60C091B1F80C0C23814A30F7760601ACEABB1215200940FFA152272096D458D00DD039F236B2727B588C62204E79C451681DFE410EEC42B74945AEC0313A391942AE1B122174DBE59AB1E390CD64941436C75A9323C69A641880870FBB280B3B37B3BD982B82955620B0783B82E8961A4043BC7F66C0EF25A5ED15326F8816E5EA4167EE8BF6666451D315B2C751441172C278300268261C78C6F0C46562779B3A1196F87835F79FCB7E0CBA15336CC83E156C5022887A80986B49C1B576594A23142624ABF524822418C6101905262806572494D3753C06281E7F17E0D796CD7767FDCE901FE1712A00A3D36EB423E29868846932A9431B8CA660FC1975E23A75B4A51DE1069D3A59F6EEB2A5CE72A8916B5E863476E6AC572929F2C29BC5627BA994163CED35AB7031C00490724555ACDE613AEB4C3E99981C62B5DC6A9B35BA79220243689E0594996857C045D67193D9E411B4FF39D0F8C3C0A70ADB72A7021E36D64FB294D932B24E1A2BC0BC41C4AA3B5EC3CF0E672DE140F484733FD82BF082934B540A635C44898E8AB8E0645705AA581718B4132C427927FAE75BF9616A5424C2020EBC5CFC1BC0ED1653AE5005A1754181620B7F06D716313033BB72A40647ADB2E667370F2C74FDB94420DA48DD1379DBA59AA22F857E231C5C083290066C548761BDF385F2F85817B212066D39F03B77F8EF41219E4BFB9C12E4FC98800571D223AA92A32C7A3C2A7CF9C995AE0A7B59391FE9A4F0D633BFB798C34B72BBA6A9F16C4132E88B570758BD551C91BD2ADEB53A72AC6AA03689DD64B035709A8AF468543CB1736DBC9C72B529E70596D18B19CA68E617A147C189D283A77688CAF94DA5A0E9B63181A40BBE7BD4168A24D274319A993BCEA8ABF505FE862129692B5BDE849F36AC92F7171E53859313604EAC10BE2786FF385B9C718154818772FA7B899C04EFD18A8019A79B6F64D5B9A2C55E784CB47CA294856689AA6A70CC27B6C20D4D1C729C409D0B925C40C30C0777815077749488B8DF0390695ABDB048C7CE1853602A54D153CF2A51617847B11E63C4C761966D5AD93350DBADA4A15C124BD808871993FC775B6E410C386590F730A8EC9475EEE915039E91B6FE425B90668C6AC5258B7AF103B9F5E230B719BBB09871DC1621517BA2A839C96AAA6440A875EAC90B298D61BD3F3AC89B405DB394232686A2BE0F3C75F15E64E61F070791EB4BB97B7019825F117C7D73A12FD3DCC22D581B0E41B786374A461EA0D88DAA89B659F0DC82443423515B633B005C958EC26561B6DB818F4B8CB2E28990E748417587FEC38A1284BBB4FF9E478 +decaps_c: 61FF1A8B6117EF118328E88B3227993014DCD075B8A1A7F9801893EEE6405BB960B6B7F6A1A27518A3409139A48B859681CC758F2BCC3EEFB04394A375A5CD71316490938ABFD194B20BCD31B3980261C9ED69BF9B1D7D7659A8040DB1E25D2BA6F703486624B73CACDCA27DB0F7E2408C9448E38873280F5E9950D7CCE252A647580C19904FAD62AEC300BC8E38F05948B63BAD5CE7C90E40C4BC65117761F5F8868F8025D6CEB2C5DF60DE38C3232922087EFCF2CD95DE5E87B6888B88C86CC78315585B2CC688A71B477BFA388DC2334DFA8AA95503D5397E2AE0352903EA6A0AE8B649A914B3525FE58F564BF19CC09F54E105D19BD81054E57001F70BBDD7719449687E9A53B16CA5366A19105A8BA08589AD08DF1300EF4F923BA9E762A82FB09B76E125F2F274D617BF30EAB465ECF24D3707AD300D9AFC1CF1DC40EE7D4EEA6D150E6F0A31DB9F8F92BA8EEEB35D7445589B046BA79EFE231106CF0A75712AB392724C53EFF9F5733BEE0D6A44D0B6F515D0F5E40B1B1E17E67AED3C81D00AC468A28F8453D4B0DA809E57D823F28D61ED0B59A08C622972D99179DA8636C45F1CE8F6252AC86D91B5E92997014E3F5089E68BC52CED5DAE6D5B175FE2D61928465059724C835902D7612CDB69CDAC664FC1C9CB11203A8C7B71486E97B7D1BC6A98F493DCBEC8E629558ED361091293D1B5D2096CEB9FC7AFEE71DB7CCFE482B68A196429FF04D15903E7A75C7BB5F622C36971694559FF07DFAA79E41C362B22643CD39BD9E1D3D6C2A306B5F1102C266EEE67DCDACF36697A836F203838EC110308C90A3D01570CB3668ABA50340E40F54CFA6A9E8862532F5F19848AA11FD34FC86B7FCB1637F4E5A1D03AFCE44124E4E460B84C63496ADED55801DF2517A90AB061C8E63AB6B14BE1694D6F389DD85F5639C5783AFCA0146E6A1EB0C40563C137010DB60BBC3D6374D6F3A892DEBC064701C64BECCB8E2C33B740CC7ED49D108A8C4656818DF5F7D91EAAA446AC6CCDE30C6D3D1BF66E4E3B7B6B81E3CB17227F80DB0096E6BE7D859C09713749FCA21530FE1A716EBE325504319BD0EA2A7D7713607CB679B0A0B2268D493B67C0481872177FFD2593F3ACF691CEE99A36ECA722579EFAA59ACC59EF8CEA9108E620B06056C19D3C1EB91E8634DE4957706DFA8F9D0A9E0CD4094F6B95A83F118A513EBFE5E99AEB88A268E0097FCC3C7AE250B681933BBC2A8F5381F94D156434A87E9EE37E78C27A0CDAEEA9814BCB43DF538DBE628C802C1A94E0CDDCD0CD5A0F8220DA97C2383936A33919FCDC11D70ED4437DD2D7C73CD0C3BB90CA7070228FE8D64A1C9D56E6B34830EF300B5AA6EC6C78A5425AE6F7AD0EFDD527CF0AF8E09B56E495BE66F665C64B0A42C5C4B24680480AD2E5C11D991F7E3DA759AEC802F176DDF11EF71469DC13B3A3E03699519858AC6FC65C27FA4CEFDA09C82E8F958E018DD5255CA2F628E0DA7391ABED6D37705528AB22EC71DC8836D7FD4645944703A51CC74D297092FCE139E8976F8BE9C5F86390B74D401A8C8153112201133D0C517C6CE7A38C086069CE3971F1AD28F3E5D01B56A480B417A016AEA46394CDF764812918D8AB0501D5D18CE13FBD3DE91F504215CCD0E2D17B7E963C867F6F132114E36459FC5AF7CEE99B789673E524131F7DC71360951A997A9CE50DD5FAFC4521144441C06BB41C79E8ED53285D137D54F325A6C2F2EF74E34C0F877A614CE45DC0AEDDF95A0E2E4EDAE29AF411C9CC2AF95C9EA9A94A7961C8246E654FA28F3D568D5FEE93352C2E0D60CCAF5B00090AB6E7A53AA06A8CD3737EBF1B65D625BCF220F74DE22D9871EFC376BF082D4B872A303C32427A0C98BECF58959C9F9E2E887DBC42AAB1656AD15637A6A8F4BF9634095491F8C99242913891437E6C5B50A213DDE80D2196BE12C3937FE3239BF6759ABB8C1C9466F42FBD53894AE52FB533321429FCE4FEC1DB352C49583A7D817EAF62000888ECB0EBFFEF69FF8E590CFA25BEAB21605B635ABC2CA23680789725CF700F553C88352F31616154873D18B6C6EB519FC639B070FD67F86AAB62349DBFFA89F93051A7C7B7BD161FCD73672CEEF59A9BB7F571EABE2570C5BF31ECAA1F9CA7A9C6D31EA5FB7C979CDD2613897E7D1503FB0C19ADDCFB3A63E2185FC4101838DA66CCE2D3D9FFB47746C2003EDD86C2F8C3 +decaps_KPrime: C61F73D2BFB18594E1BA5D3B58B4C934206D3A6F8EC91395AB7779C61FA1DD6F +keygen_ek: 70E13F301517B5A40D70361F6309416067646D2B7136626BCCCC170C66CED490C735344B6277097CA914212A292DD122FBB69FDECA47FAB4532B8C80CEB77F9C543E0BF1536D1C0CAE077E2CA7862B45A410469CC5B706BAE0051CB2961DB7270B75B711698D2B807040D5628129436FBB58F1203F75561465F54257E44D33F512D633431D00A2FB0230C9BB9CDDFC83BD65C97445302186A17223AD21332803B909E5E5671970BBB0F1C4837BB84273BA675AC074C5290B411C2500657059339DE392F9CA308952A2201A588767ADC035BDF33024EA3B9A83C5A0B9C5425D14070C81AADA26BAC3FBB8D4B7CFEE0392375C68427351DFEC63609BBB50B463E04092857009D1E5B81D707D14B833CD4A0B551BAA13EC488A1503B0467EE4023C3FE032C78225063886E2468E00F700072A2EC8DA6AFB206C91904433BBCCB0E76F42468C40EB5F59CB9AE1B035E521510BF216A1ABCB19033B7A658897C65874D5135183149F979E553CCFBFA3900CDA6F01960B75157F5453AA6E73B3ED902F7D7C9305971BDF722E2937169A1BC0FAEB6C92F7150D2330877C5DC5249AAE20302634C5C5B23053521028122542F485A0EAC869223720633651F5B247C662B31A10538CA7491B1437AA74F4282D12974D9C934DF214785B6418468B92E52528C8447A1CA422FA6CC88E28B059F04B23597323F72F3E2336F87C47905CBA655BB73FC32E18D4B78705C782EBCB43E2785C82C5AF24B0E1699CFBC0257475799A539B11A50F4DF2B7FAA20BD8827515CA370F89C0D4C60902F6567CD60B0860A55BC8572C436C246AC276644E7D602AA57C0166201814991C1BD75C7C47C348B67D77613386908144EA83FF721F9A50076C510164D18E05D05D9884C44146A07CCACF890498ED1A19B2A15431729DC1F12B7EA10F9F928062D1454B4B9F68E59990290BE3728B3289569363AB1005131B2381A08CC2BF943E95D5B21BC6AABC2273348BC72BD093B7B5617AE87F602BB989E6AFC44B81512076A3A876E0E25F9762B462081985502F26B287A2936D5B1ACFFCEC4EEE77A9CBA980EB9B5FDE75539F650904677DBE29AB8BB918A3494803ECA59A2C32E5B5C83B0B80B1102CD7D9482B459B6B74491EC30C4BE77C2B524AF7B3AD1F71341DF0A76F255C2903C88208079379930A9513F390126E732A2BB094BFA6BF0A432BCD657DAFCB25C8BB15E0955D099B74FF1A4DE6559CD6797C38C48C1134CA2C979243F3152AF4BBE4D7A6BC09872133920CD23B3EF9848CCC6845D647B5387557736513D58560845192F9265159932E572A88C44E6566760C061C67FCB5BF210095E214DA745357E36996D8C066311BBC761A1FD25273D21EAB50010563CD6468A4EA836B6D64BD2BD76DBE3582D5736A605A5509FC28789B56B884AE9A60415F55674BE601576C7CEE58143BF054806ABCB345A256CBC454E343F3CC7ADE65562FD29EB259737BB3CF9649BDEA283FB07265677C9808D13119C0A2ADF745DE6975F4562CD61557B3965D2B072F000AA7E0A357E1253EAFEA7FDFCC92FA87630DD2276CE42E820B69D1FC2E47D5C498A55B3B29C34E64903D047AB1C04024958F701195F5D13EC6706B8448503A549922A58A24B67C93632756B77D225407316171DEEC56714435CF94CCF4599E00D10E569622BADA820C452F2542ADF08765CA93AE38EB025DE31CFF7974549A7825A831DD054E87B84C5F2547FF47B46F88C99F1548E933A6F4D87F1A4A1B00E39E02D60E51EB603C1C0D807ACDAB08BAA2B99869B75CA2C4B96368B51780BD1EC75B110B9FA66556876C5F48797D090138F754AE30533D36AA44B9B1702A6A8A56626BF0451A37A7AC1A337076E51E0A6B0300C2C790A4437EA28D7EC98C419B37D6AA970417435F91BEDC2B1F4BC8158A51B1F471516FE824287C896B891B49F254DD36359B89C824EB3F6248027FBBAD4CF29118CB50EBB625A37C537A0223F0EB7085B5C7EC607570DB9185D59902BC26C654A2804C0D946793D8A21482AC4F05E9016260331DCC58BC66AF3CA7585440216AA0263B2A725E080F6F9C5B6A9C9DA29355189B4B95B137D1225F252AC797B0646CAC52164B5972A99265D347FC7C3591D15FFE681C06D438CCEB60BB6310B7953289720E2C728730052337ACA7C8521AB44F1E2A049B83E0774C96CD8C876FA675D0923977271B +keygen_dk: 8AD0B5F09A25AA935DD9DA34AB82CA75A12D66E99CF48BCA45B9B2DB441BC2971BDC9922B5F8BC3C0678546759073CB88E26BAD1B1B3A4646A6529C632EAA347734A3BE583D47178094C4A670CBC41EC0689765668542E6F15A7D586C9E26A6A03C71469C2C53F7B141B232D86216A25C7A8F36852858C07A9524EE17BA6340AA2A215C1EA852167B6891CC166C2FA13A0270A22983413E0ACC444BF40E28C45E14E07404F62996369597F10FCC180ECACAD1A6719AB9F1B447AE19A2CB02A7D04206172168C4F0A99BAFA932D6649E894A8F0577B81C66483C5B5CF60AE75A444526A9B3674325FBA38F53296421A785011C1DDB3A6997745DB83CD583C0C4177C797D40A4F699F1F40C5413AC4E42373492B6A2C6A406D437F42570B5E949EF4350DEA790CFEB72D1287517FE3273D3CA65A13CA6E23C57BF07DA04B851CF3AFA18BAF5EF020792857A9E721F01B9FEA7B612E4C6E29079366B0228688BE2A067FBE92842DD280B3C74DFAB761E613A8604C476E15466685C695AC35791A9159942F60170CA214C7C09B1A4B1BCC4F4CC60DF01A101915A9A2BC553119665032DCD9476FBA7BB07157D33C9C8EFA6BD0AC38C1AC265FB51857D01517615326CA0E08650BA6FA40832C7B4C41B644716022B652B1927D55C9B37FE25F1AB67A9A03C7008C84B07C4926B6381E40CFD441041235187416CEC366CA6FB76FA0AB6E328A2641FC47DCD76E91CA9431E19BFF02CE6228C2336382F8A10E9EE2C8F1759390A20024A15B3B090C1390CA0343797284246BD8943507B7A6B71FC33A03B7A88366E4AFED515739E5C69F8A266E4A1F53D73930E9875569312B27037E5C7F852100C2BA3648B1B9C1B149F6250E0A6B065213134F302569755B8C5C4FFC680BF7811845340035F170B068BA67A4C3B0166D03CC8261840190A20F9A3B1EF465C2F2182DA8DA8D3B3C8CB12915F7D93E04D8840C3567255A7BD6D433CF1068D88452CFC11F991B7CE37927D6CAAE8810742F42148B896EC4EBB5340386315B2C1E2B43915C04549CC8C19AB40E3B7C311B426110A9BBB18D3B992A42C0189290BE673A397C4090443B88C5D5C565A10FEA05603D36244A4AA8E9255CF184AE69535A8399C1C6F76CF2342ADFEA6A447BB4501B9A6C44593EB043E7A5502F586CF3407DEB7A0FC32B3F46F1245C5596E0F1BED937207C4509E1D8985BE745FD69BF448092433028BE2595903311479586A34B2D49107410BC4BD2965317FC76352B638DF3B3A315325026809E3BC4608C0B2CB84DF0C95BC052707FC1A377B2B465EB7A5D644AB4278DDCE5B61E2BB3A71052555CB3BAC693EF025FF00353FB76945B8AA3E9950F9273818791CCAD56884658142A2B4DF3C57ECA13AD44B49B6346C63EE89078589E9EB9A9804A03BF7A276F86B9676C58D3E71D2C8770804A61592178B449C7955BBE8CF42F316725E3B16D55B527CFB232681B21B2CB2F30AC76015BB5416A0411C1745892412E683A98D736ED1A4CD980617D0821C2AB0282070A611A11D19701FBD55A2127B324E6901D84986C0464DE7120AF4510AF591DD9BB79479C5FA88714C6A99714F76A1E402C8F384A4EE6BCD41500724CC1793ABFD8D3C2F320397134B00F762DBA85A23AF155E6CC037380C1DD64A973DB35B74470448B24212062764787E5A93A48807171A7715CFC89BCBC9E141886F807A1F9D684C8426F0122887D9C4C27EA69CC151B4D49B51E5A4EAAA5AD06ABA86DF942E986A5D5792080FC480396B3948668FB382CC8FC154748CB30B7641F0270C83438B4FC3D1901265880405177BC7F44788251ABC474273531212A66279E70337A2309FDF49E059BBDAF4973A5377A4D517BA755702C37CC355685404C952FB67E0419C78D1584D0949254D04952F7243BF1402803C9FC737325881378CA77EEF5C415FB037D689A5854A1D24B6527A59B9B16959384358C423C79645CCF3133E21B4B64957B14F63F2AA26357B1C262F2A90F7CCC2A15936999B0A1B498AB3B32433032C9CA23081C55D31CAD36E90C1CE0B5FC247CE8C843F2883524B664FAC1B20BE602A10AAF65738680BB10254426F9CB09A1954DB7655622308FAEF505ACB497554A8FCFA96A85255AD846542013B8415951BDD45C01931EBE583E70E13F301517B5A40D70361F6309416067646D2B7136626BCCCC170C66CED490C735344B6277097CA914212A292DD122FBB69FDECA47FAB4532B8C80CEB77F9C543E0BF1536D1C0CAE077E2CA7862B45A410469CC5B706BAE0051CB2961DB7270B75B711698D2B807040D5628129436FBB58F1203F75561465F54257E44D33F512D633431D00A2FB0230C9BB9CDDFC83BD65C97445302186A17223AD21332803B909E5E5671970BBB0F1C4837BB84273BA675AC074C5290B411C2500657059339DE392F9CA308952A2201A588767ADC035BDF33024EA3B9A83C5A0B9C5425D14070C81AADA26BAC3FBB8D4B7CFEE0392375C68427351DFEC63609BBB50B463E04092857009D1E5B81D707D14B833CD4A0B551BAA13EC488A1503B0467EE4023C3FE032C78225063886E2468E00F700072A2EC8DA6AFB206C91904433BBCCB0E76F42468C40EB5F59CB9AE1B035E521510BF216A1ABCB19033B7A658897C65874D5135183149F979E553CCFBFA3900CDA6F01960B75157F5453AA6E73B3ED902F7D7C9305971BDF722E2937169A1BC0FAEB6C92F7150D2330877C5DC5249AAE20302634C5C5B23053521028122542F485A0EAC869223720633651F5B247C662B31A10538CA7491B1437AA74F4282D12974D9C934DF214785B6418468B92E52528C8447A1CA422FA6CC88E28B059F04B23597323F72F3E2336F87C47905CBA655BB73FC32E18D4B78705C782EBCB43E2785C82C5AF24B0E1699CFBC0257475799A539B11A50F4DF2B7FAA20BD8827515CA370F89C0D4C60902F6567CD60B0860A55BC8572C436C246AC276644E7D602AA57C0166201814991C1BD75C7C47C348B67D77613386908144EA83FF721F9A50076C510164D18E05D05D9884C44146A07CCACF890498ED1A19B2A15431729DC1F12B7EA10F9F928062D1454B4B9F68E59990290BE3728B3289569363AB1005131B2381A08CC2BF943E95D5B21BC6AABC2273348BC72BD093B7B5617AE87F602BB989E6AFC44B81512076A3A876E0E25F9762B462081985502F26B287A2936D5B1ACFFCEC4EEE77A9CBA980EB9B5FDE75539F650904677DBE29AB8BB918A3494803ECA59A2C32E5B5C83B0B80B1102CD7D9482B459B6B74491EC30C4BE77C2B524AF7B3AD1F71341DF0A76F255C2903C88208079379930A9513F390126E732A2BB094BFA6BF0A432BCD657DAFCB25C8BB15E0955D099B74FF1A4DE6559CD6797C38C48C1134CA2C979243F3152AF4BBE4D7A6BC09872133920CD23B3EF9848CCC6845D647B5387557736513D58560845192F9265159932E572A88C44E6566760C061C67FCB5BF210095E214DA745357E36996D8C066311BBC761A1FD25273D21EAB50010563CD6468A4EA836B6D64BD2BD76DBE3582D5736A605A5509FC28789B56B884AE9A60415F55674BE601576C7CEE58143BF054806ABCB345A256CBC454E343F3CC7ADE65562FD29EB259737BB3CF9649BDEA283FB07265677C9808D13119C0A2ADF745DE6975F4562CD61557B3965D2B072F000AA7E0A357E1253EAFEA7FDFCC92FA87630DD2276CE42E820B69D1FC2E47D5C498A55B3B29C34E64903D047AB1C04024958F701195F5D13EC6706B8448503A549922A58A24B67C93632756B77D225407316171DEEC56714435CF94CCF4599E00D10E569622BADA820C452F2542ADF08765CA93AE38EB025DE31CFF7974549A7825A831DD054E87B84C5F2547FF47B46F88C99F1548E933A6F4D87F1A4A1B00E39E02D60E51EB603C1C0D807ACDAB08BAA2B99869B75CA2C4B96368B51780BD1EC75B110B9FA66556876C5F48797D090138F754AE30533D36AA44B9B1702A6A8A56626BF0451A37A7AC1A337076E51E0A6B0300C2C790A4437EA28D7EC98C419B37D6AA970417435F91BEDC2B1F4BC8158A51B1F471516FE824287C896B891B49F254DD36359B89C824EB3F6248027FBBAD4CF29118CB50EBB625A37C537A0223F0EB7085B5C7EC607570DB9185D59902BC26C654A2804C0D946793D8A21482AC4F05E9016260331DCC58BC66AF3CA7585440216AA0263B2A725E080F6F9C5B6A9C9DA29355189B4B95B137D1225F252AC797B0646CAC52164B5972A99265D347FC7C3591D15FFE681C06D438CCEB60BB6310B7953289720E2C728730052337ACA7C8521AB44F1E2A049B83E0774C96CD8C876FA675D0923977271BE6E832F2498CA5A3431F40D3187B1ED965FDD6693B37F6EB408A99977AE496447AF65022E0A472ED6388638EA29D82DA68B4CF9FFDF2B67CD708EA5A370C6A7C +encaps_c: 8D4E2CB39FFDE4311AEEDB2338BF58CE11FADABDC9813A321930F46756DD13A8E7919FAC4F59CC9F8B91C833B3B3F91ADC6F9FBDBDE2F7DAE8841BE5238B9850A5EEBE675DDEF42A9314F690595D51523E8117F22266034F09B77D991EE575802AFE446374EB3D9E1BEB8F25049C6EFA96327366C024CDFBE8DC27EF56492C90409E87139C6088488E17B82D1556C25131ACEE7DAFFE2D437CEC3441BBBBAB80C4BF177E653AE0831C9B4CEB70505727D63C4D474FEDC52019BE411C9A43B87170F5893F06ECD8D782063DF893A1B682246D1C64F8F5A8C6FCDF07927F4D5B7A397FBCBD075045DF2C4A36F5304C95F44AF927AE9166420B39448794F5B3C35227C3C9DF925602A1AC98F851AADB65C93FDD6327AED8AE4129724436A33AA08AA56608855FF80AAA42ACA4562B2D78DBBD2F91AEF251566B8C6F98213784C99DD7D71F495564C908501E35E3BFBB675CCB66635287CB6466E6E38EA8AB11CE7EC60BED8620B3DCD6943D1279A41F93A87FA359E513C81DE918DA88322B1B088140E074BE39BC17E3C51AB719DF6E426D64FF94B8662B9DD26A32A3C3687BF9294C537A2268F9DED380CC8A0F1127EE5A322B4DF24D87FBCE76F560B037C659B6FB15C156071AEDC26EF11140DE88D08D463EA0EAF080A0B2E627D9FF1D56C502335524269727A032DACD16543ADA8342CD6CB40E7228592C3574D982E0B9145EB865DB2EE7810726A916B837CA4F14C2CB9E951BDE76BE16B8B1CDC2EECDC06949B8BEB11786B8F25F4C9AFA5597CEB1D85FC9B9C91DC61966F396091E54C96C97A4300E99FD9F752C0BEF5D88CAFBDCB3993FCF6C7A8C5519FCECB6A79117E9B521680197D8A91AB75F1814DBC58075EF4F07987ABC56A75DA4416EDB9D6F3D771AD340D5CBCFC0E571FA70AAC1C7DBBB5F5C5E1D8B1036F5A6FCFD0625AB5BBDA571839C5835DD6979778F59D348684FA6CFC2A62535B47FAD7F97B5218872D52DCACE9D3C1B11628D352AD821900F44E14B647F6BFA70F646B5C7AF5313177A10954944229153A449FCF89A6263BDBF8556E981E5D6251340F9F43C6692030FB9605BB99F33E96F06D1E4E6ABBE65E14696D530F1B525FFF87D54C1AC2F5E964D46EE37F4045B54E6098F76B28EAF69E998888D25E021A538FD1956A7FC30AE83F8BA9947F864FD59731A6FBB402AF2990E1ED2D56BF62AA6CEAE6F769D2D0C6C313D7AAF974E69DC02CC4318B9457B8CC40656AB7B6134DE3F9801CE019699CE855EBE9C6C02FD08506F004A4EED2CA166C954C7DB8810700CA671EF372A290B00E1BFBB97E3E674D3DCCC57CE59F465B1488FF76F6239008BE3E761EF9C113DF0107B8EEAE3FEBA55B35E4C1DA3B6C87A8D20110E1CD771CCBC30DFF761E603D488E55B853AAE7DAADF2A007B8393DF08AF534F9F53A73757BABE21C86426CF058ECA817EF237BFC58AC298FBF2A1481C4D12DCF1B737FD639769A2531EF931A362A44456EE2CA48598B46259FCC977076C59FA4E2954E9967DA45DA7CBF78633EC59C463FE48A83B801A54DB3FEAB445A357E418B0653F2940B2B71381B2DF9ECF8100848E2912F4BD503AF075AAAF36C136A413C95BE2F25A6D291976CD66A27643537E35E1DF89B1E494B36B08F3D0196CD7E90BA5BB21009F37A843199E08DD95CA4948C533CB263B5D405AF2FA119981A8536EB71C88226C41534C2687BF1EED3475E8488BDE909A93D4DB55B6E834B5E7860AA98FD8BCB13AB077B7BFD75B35FA393E93E3BFB4B9BA1DAA7465FD5B23A5B4CD1716D4BDF7B8D5574B156DB87D8DE1E526C97F8EB287BD97EEEEEF074DBCB2C4DB51A4EFF1FA7FFF328A572D7270017108ACE2ED25093DA535C7A26D3B912AA57FB322E53BB222E94E7CF68CD8A21AD7C06A4AF978ED1DEB10E3F2412AC6543C182068EFFBD87F31765F5AE681EE8B2E9AEB5BC940A94EC0EEF5BEF74874169EABECF1512565C51EA58721DD3AF1690365DB22E1877F2A5C01723F69B7725277AE4E9EFACD3AFA5ADCAF385777E7CE10F956B4642C6FC1C97808993EFD994CA65C75F459AC5872F82488C57FB7AF9AB969D5E369C16D0B2BF7800B938D6784C7F64D0C55CA7794654938949E14217055D34101F9417D370A8ADD72FC0B5766EC1D8ADDD702334A2AC27709C5AC5AE5601DBA952BE258D9336DF3E0F65878A858613258FB5E47941B diff --git a/tests/PQC_Intermediate_Values/ML-KEM-512.txt b/tests/PQC_Intermediate_Values/ML-KEM-512.txt new file mode 100644 index 0000000000..08b8715f0f --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-KEM-512.txt @@ -0,0 +1,12 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +keygen_z: CD119AFDC8559442424A87C13EA101E29FCA11881869077E4092E751BEDCA8BC +keygen_d: CD119AFDC8559442424A87C13EA101E29FCA11881869077E4092E751BEDCA8BC +encaps_m: 109A248FE8052F84271FF57BAC156B1BA6A509CDCDBCC96CCDB1CCB85CA49315 +encaps_ek: A5409718CB72F2438A3555A3C8F18F2671A1F81403DF7B5A4659A51F50827BA6577AA70800D78D8BC5AA86B89E08B58F3480A89E104DC6922EDBC12D06F891027C654E994A22F91A2AF63404CA98D7B67EEA25911B24C70DEB8146A0821F34A302551F2D510C0588C8BCA74EB4DC0CFA4603C1C5A3C5537061789068682C4CC3143FBA9BB5542F9778BDF23B3652F2A7524756FA73909DDAC7E532522659218CBA25F33B6B0458CB03DA7935BA59111955312B15CCE2C0F73466A8006283A2AA7CBB61022ABBC2D19F2920BC302472DC97C4A1788C9BD3BBEDC9122B827B279C074C80443141119F4B1629F62F10D4CE2BE3BB343816CAD16A1C87582F2B70E26635B08BB390C13398FCCDA7E9BB3D9B0B7803750C955C57A028A5D26C270316BB2B815C3B972BA6782DAB02F306821E61285BB072BF79781CABC386142A50C7AAAE66A947585BB0D8288DBCAF4B3B85BB7926987BAF7643AAB5FB02210580A0264352E69C6098989CFB87483395960A3A4F31BEFDA80B5F286ECFDAA555D4390AF6B55D313920929093449CD6729D00218E2D86570ADC0C4F6545FFB5632EFB3AAE2625A6982670FACE8D16126FA607E6D0A1FF616A46ECA642CC6AAC554DBBC43DFCF57F364C190CEA5776C1CEB58B7007505FD79C5F005A4BA218CF0693B058B510A4CA204324602F59BB8F2281C4D7B0BC8625E7881650F57C89E32CF4809144775C9073B673E39412A27C914321CCB6A7CF7C37C5BCBE7CA51BE0C928466A458EB778D6466A892A0ACBC09638784A27739C970CA58BC2595AD6BFA4E52EB438AC97C41623802248E110B074838F31A6E7503737704E7AE4AD91299572A8C13603500F3609B625B4E24CAE332B0D7A5BB47A038512A081BC27CDF0F2923CD3479F5307020B77F149584564060E5083CED55312B6A6A465A82B4577D63A4B49C80B07A9367E39778AF76FA8EC2CF528722856CE7813401A8383BDB7151B9B6D2DD6BFF55401D28AC612818C88C9287347B098A966EB9C0A2DB71F0A75555E1757D3AC4E3D802C8DC6A261521255186ABB98C2480301B8C6B31228B54461BC44EA3C2CF94B86C7A5B82C55167A7606CA9DC8253B7604E44A07F3ED55CD5B5E +encaps_K: 4DDD304E274899BD82971856824B587130927952060121858F9ADEB96AB7F571 +decaps_dk: 174313EFA93520E28A7076C888096E02B0BDD86830497B61FDEAB6209C6CF71C625C4680775C3477581C427A6FE1B0356EAB048BCA434F83B542C8B860010696A57299BB262268891FFC72142CA1A866185CA82D05406695BA57D4C930F9C17D6223523CF5A4F2A433A364459AC0ACDE7254481329288B1BE187CC25219F48C2443C532199859355320D04F0B80DE969F169A3D2BA3411B4ADBC01B66271824CD9543C78BA4804AE81F3AF00336C5CC3698354C0E01873A2A17D6A95A312689A99DC89084150A8D52BB31C3FF3D4215FA3C4111B401992866E513E5128A20ED95FDEE61485DC937E099D76F79B92734DC4CBB9A7A413FEA6285BC0C27C961E47D1983644C4BF913D72F4B030D34738427263E87AB4C0B7DF0B72CA8AA0BAA67B079939D587801D60C87A20405E5C52603C072FDB63E2E1C2A95CC26F5ABEF6088333800886D093CA01A76F57005E053569542E0A076B98736D4D39B00FC1653FBC2D12EA32A94B9B92C68BA4B68A4E7B370A23B03FE8221639B01244806C27067A58031DB80D2D03661A017BB46BB3711ACB568A4FABEBAFC5FA06F7CA0E4D962E3170CB11C0A8D18A09CE27A6A9763E123885450224DE07CC17546C17951FDE476E083583EF10BF76A98AFFF9B12DB5401CD3673495392D741291C3AA78420C8A7CB5FFE65012997C4DA4322EA90B5014B5B4D0180100247047341E4C24B96B8D7C0020524B7C1D66C3E08CB299EB4EC6FA0EE8EA05FD430F57605E892B232D2047CA9B4ECAD9BDD09C9951196916525D1EC921B6E3CE0EE692EBA728B4DB10F3381FBF584ABB7B6A9210C7C424CE4A369370CB48D608634ABA0BFF91C5620A1189D0CA97421D423429FB663952DC1231B4362B7162FE3A42111C91D76A964CB4154194209EDBAA1F481BD126C325D15678E39BCCE4C704EA487246648A6C6C2540B5F680A35EE2824246450A7293F21A90CFD14EFAF78FA3D7322251C641A50E95BB5EC5CA0B60E89D7C18B7A44A0FAFB4BCADE9B588D1B7FCF12BA1E1084D56B197EA90A79A3D83927A2307603BC211C0830CB7062C04254824575B226CAD9A27C2A45519AE39546467690485498A320AD56993B15A9D22C6191446CB40AA7547401681DCC7E36596B10C07FA2A20B43C4B0124401F8A0E744878C7296623C7395B6994D18C4787A289DBB05CB1827451D83F072904537594F515CA1017991620A33E096EE0DC091AE4CA960603B101B5B4E23E9A5B65E1F6C2A8CC89341383B706725ED5B3485769181B8F76439C05636A0C3436FFBA8B86A5306FA111F6FC71EB779B25707CFAE0A6DA7B0AD5D94B10F21E4FCA92893B9FFE73210763401377837A10CA9625346C42ADC705BD92DB3426D926CE4B5EC24A5CDF27CB91E5A7E7164D1BDC99D75679FBC93A58F647DAC1086CE931BC089233E9487E0867BC58472B01BF2895C323B64DBE4A17A9E841B053CADB5C76D035724C321BBC13666F0A35DFDA0721E8987623256A994D95FA1C05F57C1E15A30C4A0C8318A0D83C410C362862E817DD6ABBAA4BBE75B736CCCBB4AF2A188402BD4CE597932008862865332562F324C7A424151FB59D0AE1821F2864C7E698127AAD92C33B313988C29A09E260449BCA7BEE360862314E47519EF3918DDDE403E7B92AC9908F93C6369CC5C47B8CB1DC3A3479C762F62A18FE05A9B0645A5311A01828723AEB51FA505E96B29E3D2B6E5B1327DE3A61AB0C50BE0124B64B33314B32D6122510E46445857AA0E2C4B0D256955620A8681D1E555126D00509E35BF59683DDAA40E82C519B855852C366CB54452BF910B001692330345708653F511800B10E009D9F7D10A53B8B30BF13B06F254EC8A6BA539700F6358DE0463A019540C9873F3F4680E2113A7CCC55FF754D85AA67E9E55F887424E0B2625682A5DDA218F03C3C10A246CDB0CC91D19D8F024DB9B1415F50ACD8F65DE2787B9103C575B687765572CFFA59026C2BCEE77423BCAFD3054BF8E2713FB85B0BF6A46E716152F5C9A3011EC90114C76B01516799BD5911415B704544077F188806755EEC4131E55556DB903F4284C1F90086FF431B68F51F629812F320B55F219D72A1928F38C9A1EC823BA198BA9ABBACF62902B3CA0AFC95EA8AC303FB8BDD29BB9D18A03BA44E58B1B0B85A2A1662E6A31DA7545511A478A18177889061EF76631264239ADEBD04A8C52B72E2B1F3A2DFBBD8C054E70CC2A742E7B7D417DFED314422187DE1B2954481195755EC04BB7671C4331446BBE8952514905321A2176E935B5420C0D5EA4465 +decaps_c: 84A188A072E4D4F449A4BE170274DD2A5F3E356E95B96E40AD3FF1455E36C6A71E909DD2C0DFF8AD2C9F503BAC9065716248083BDA40CECB38E3B3058BAF51A7572384FF8406A8136A4FC6D912A54B2EB5B9D598FB689E72ED3DEFD2FF8355ED9E9CCA53E82C0886E094C592C392311F04FEC68F9A1C531CF3419030892B5BDCACEEF6A0E7F1BD44903F49DE8E37B02BA3FC5121D99F8CC3040F66832F77021B4CA35F7A4825038936564CA2E673FF9CC0519C25F6A52D87EDD965B2464AA365D2BF068B72FC68B65E88515E2C832BBDB27D61BF512B5FC2D8590FB35F49500CAFE70E7D0776B5C4E4503A7189ADBAFF5D5B515CC68B2F81D993C6D7FA7D3D1D90EBFF51DA3FBBB4430E5BBEDBCA8DA078DCE8EC815B168BFC09AB4A20678870F4868B1FAE28D209C75368A799317DFA08C2B651FAC72DCA2A1B4CBB75E873F15C51B6D0B5E6F5E60E2AF6C40D2CABCBF3588F44BCEA6D72D359F40F9CF5E0EC40A5215E5ACEEAF0DA00D923D4CEFF5C3A3AB1E46C754F4AE052C2BC49FDB4521AE44DF634D56E433DAD3DF3C07115406FF8BFD0D7C93B4941D0F09213C1681CFD5C8663DF02041A3CBD162F5C4D80CB1DC7D4A501AD06FE96EB348B6E331C8296FE904EB97C087456328D703B85BDAC2FB43C728D0B05FC54B8C155C010EF0DB14CC668D1B1BC727AF8864076736B898BABA1C81DCA2053F58587D3C4E33C694A264BE2897E7D2EEFADDA9FF88D70BF3731F1228CB3E131EB0CB76FDBD2CCB1CBC18D1450AC7A16349E7129CAB720D5CB70B56E855E8305DCDA730BBD0EA33EF0815D02190BB98E30F73BF7789CDD673C613B0C57CB2EF32E670A98D2D630670773C59D8A6A2CFCFF1C7CA1BB55C17A32CB65A2EA19C7B8E295C6898CF32FEE1DEB01472BE76C3A78CB242EDFE21D961FCB85C3CF6CEE218986C1BD932BF97BC6DECAABF8C62940C0A58E87C6EDDCD74B7F715D8C22520546239F3AAA10A435820103B4E3295311D992C9C8771A3CE849868F36F31214F9639C028F4A5F4945F2BEC9585077BF2F637D2549F8348C00ECBF19C470DF255EFF6232813429F853 +decaps_KPrime: 224B9C051213EF46549243796532282973FA7CF97E8913C339C1940AC17E05E0 +keygen_ek: C65A1D9D479777E6905A91A5CB24551C8B1E52A3C77B63313FFC8B5817815259A6ADB59645DC4BB1436D51E62A096834AF43772510C4EDF34CDE0A5B57C145E687CB87162F001C21C9E1934AC11AAFA70FF810732650B32A3018A7C50CD736796222C8AB821A9283BE1CC204C3F1630D3CCCDB0A9A3D17552B9158C0664E5D6A04B0FA36DE45862A46A39EC597AE42C311C4AC224A72D6F253BB5235F7A2B8B0F24D1376AF588746F3BB8E0365078761CAB983A4A6A940A3D997047A8F36A731E8965236C37BF200082F821DCA7716C444A90BEC53074BBA58C132BFB9A2ACE2CEC9AA658EAC1232CCCA3C817A92C1195C05C0E1D6639FD2ADE531607D488B74A747CFF47FCA5C8B2163CA03C545ED103278430C60B2381A09427FD130F859BF5DB776DA095DCA5804FA63B0D7D87FA9415C72FB51872A989F466C984BC74C29B8632019CA040C9CA35E22608DAA70357AE2C3AD83631FAA174E0ACDF5DBBF3CF68A05B6543AB6268E1A51B0932C17B00A1371B2DAB241F92A43FFB456D0A8C8860A8E28A61A21307CC0456DA4242905CB1D3D0BBD81BB8EE274A43C76C310019515FCC140467C33370C86808ECAA58E3BA93A2C1190461C1DFA11302001BBAB4CB1E3642EF8CB26309B60523BC21887B07F898CE562A6CA778EA01505851378CEA8BB7FC09D11961B6C596F93542A9904864EB10CD0A703DBA98921861A87B056525C71A843553E6400777437C95CCC8085CC0C477D665A4479019D4CD442F74A3CD8169F4262B8271B5D5A67C8C1611AAE7B3D0534C0859716FDF0BB68949094C06A1B73C9AA1CBDF331543DE002A8C06F94E8810A5CB373832745D720683B574875A666946D0296893F2B59E907488D8C8489D474D929A05A573ED667490371A46D4556CBB68AAA79CC3EC6653413576C228E379A14CB90B7B7591B19A7BD37A1C4D37859892219442BB0B9B9BA67BA3BC0D095C8803CEBE97AFF0B1C153578A130CD8157CF745946C2F5726D9C11273575505291346528EE0BAC047CC984538B97BBABFCC357DCB8A98FB857C9C52D1B786749CA61892B09759980520091B9B477C70E6C46586B1CCEBE87BCF6DF03C2B27CB09FA03F63160958383BE636 +keygen_dk: 37EC477E217BFB40384C850E51C1837158BDBC23A31832BC25C91B3121444AD4533733BAFF07CA817B64B2CA4299AA26454CBAFB35B6ABE1185CB47C4CD61AF98383C4814B20AB8754FC514F23074114C3E5A810A453B855AA7F1310C74B0B01E5AAB2E871738FAC2786C7A05D6B3B32A050D0FB223956C95CA0C2C1D54154A77BD33737A49A0065D1424A2ABAFD52AA934C9804939208F05CCF8B8B8086316E0943A08710500C918A2B218D37B85AE28022CB0134FB49F5C45D98D3C04B755A60880422668E2B301B18D5194DE991B265BF94697E6A4B8150C8B852033915635E30665BDA2191DAA505D43344FD29C9FCC1C507691D475B617C948FCC84B1B08A1C638C3E13580CE359789A9860E5469CC754B08EE33F0921BDEF15A906969F2DC57A25E80CE4C45F11E04A519AB08B9B927C3A13A081CFFA110FACCC5E8DC29495978B5553104D473A175918AD5B5487BBA69712AE93F615C60A8D387BCE3F651E56880A522B2DB86351CAB65D13B4693DB0B2C80936FAD1CE67925E6BB7C110C43E83247D22608D8C1023431CB69290A4F8A9593BF1241D737C0CD16D75EB50C6842CE0A21DCE494036824CE63252E9325F05B734452B129132B196084A3788BBB1F20A37D2C2B3F90E0DD7A274C9B1A9F02EC7E721F4A43D409A25FBC99A44D4763107C787620941761ED48C932924BA620986CF277A23471C7B13333D936C0DD49E0FF34CA3AB8234C42AEBE459C612052B9716E96B20BEC718126040A9091F6BA9445F45806AEB6E3816710F7CBFED1101461284DD962B7B12047C0A0A906A0589B4A9A426469BDA3946091A375B1952A91C231C0FE6B57F7CC97EFED0BC1001367823BE1886308B3A21452B7E455066719CCCEAF6A726FC22BC8399F54BBFCAF7CA63BA73173C7AA8619A3F485C3E330421006766746F4EF6653E440E5CDC59534018C352C023584CBB374EB7A9B7836832BE53AF272A069755CE2FF29CD8B394C52422B3470E27415F41B397535959F160003B452CF49697B7A53689852BBE6CCFDFB40B48E9328DE11522D0A431B115A5C0C2F4307D9862C0DD1B40C65A1D9D479777E6905A91A5CB24551C8B1E52A3C77B63313FFC8B5817815259A6ADB59645DC4BB1436D51E62A096834AF43772510C4EDF34CDE0A5B57C145E687CB87162F001C21C9E1934AC11AAFA70FF810732650B32A3018A7C50CD736796222C8AB821A9283BE1CC204C3F1630D3CCCDB0A9A3D17552B9158C0664E5D6A04B0FA36DE45862A46A39EC597AE42C311C4AC224A72D6F253BB5235F7A2B8B0F24D1376AF588746F3BB8E0365078761CAB983A4A6A940A3D997047A8F36A731E8965236C37BF200082F821DCA7716C444A90BEC53074BBA58C132BFB9A2ACE2CEC9AA658EAC1232CCCA3C817A92C1195C05C0E1D6639FD2ADE531607D488B74A747CFF47FCA5C8B2163CA03C545ED103278430C60B2381A09427FD130F859BF5DB776DA095DCA5804FA63B0D7D87FA9415C72FB51872A989F466C984BC74C29B8632019CA040C9CA35E22608DAA70357AE2C3AD83631FAA174E0ACDF5DBBF3CF68A05B6543AB6268E1A51B0932C17B00A1371B2DAB241F92A43FFB456D0A8C8860A8E28A61A21307CC0456DA4242905CB1D3D0BBD81BB8EE274A43C76C310019515FCC140467C33370C86808ECAA58E3BA93A2C1190461C1DFA11302001BBAB4CB1E3642EF8CB26309B60523BC21887B07F898CE562A6CA778EA01505851378CEA8BB7FC09D11961B6C596F93542A9904864EB10CD0A703DBA98921861A87B056525C71A843553E6400777437C95CCC8085CC0C477D665A4479019D4CD442F74A3CD8169F4262B8271B5D5A67C8C1611AAE7B3D0534C0859716FDF0BB68949094C06A1B73C9AA1CBDF331543DE002A8C06F94E8810A5CB373832745D720683B574875A666946D0296893F2B59E907488D8C8489D474D929A05A573ED667490371A46D4556CBB68AAA79CC3EC6653413576C228E379A14CB90B7B7591B19A7BD37A1C4D37859892219442BB0B9B9BA67BA3BC0D095C8803CEBE97AFF0B1C153578A130CD8157CF745946C2F5726D9C11273575505291346528EE0BAC047CC984538B97BBABFCC357DCB8A98FB857C9C52D1B786749CA61892B09759980520091B9B477C70E6C46586B1CCEBE87BCF6DF03C2B27CB09FA03F63160958383BE636C0ECC8DDAE8B594A14037868BEC0B22300DEFDFAA1D973AC5CEC84AE4386B8FBCD119AFDC8559442424A87C13EA101E29FCA11881869077E4092E751BEDCA8BC +encaps_c: 597A06DEB88172BA8D7CDE8D82CAA234B8112AF8A72F1AB4CEA1EFCB2D868D53D212E303B70E7E521AB0F4B5DB4F51159248BFB275361BEF883752C78B8D4712275385536A4B0A96E3C23EA6C17EA92B602616E5821E5753A4736C4039C20C923CCECB579805587C0CE72218BB1AB12452F8E154CB8643328142F9B340A641C6F295E5ECF2E048BC7FC79BC5B94277C868D8E536B50425809DCFA024A3905CBA550AD3BB52B459AC38FABC9BC00EBA03EC0906725B4FE4E976F174320047B31D15891365BA482388F0FB973B85224FB00BA865AFAB3C9A1B7D489F7B982D0BD470EF948ECB5B3920AF89035960123B1F8630D763681BFD671567EFBB1E6276AA4FB2DFA9C3948DB7F083F28383B77BC514AF9D68D22E2487C20163C02B0BBF23BBCE0650F84FF8CE02C74E9E11D6F30EC5FA8A012ADC3B89627C7DE855C1FBBEB5DCDE84D05E36C5566E5551B58750A411642639B27864F7E005978FFE256B757D13DA663FC3BB0794A27CF7585D12F22D953B285459FDC9BCDFCDCCB7BF3E4E362D2891D583855F5D9487E6FB217E2E45EE0BD9AFC289F4D564581209A3ACA31795A124BD1BBAEA846755C8EA7810EAA73060E86FB5FDF3FBE72F806BB1BFBFBAC0C7B16BFE74250277ECF5F541571B8A975050917FDF781FEA17B585E3C6DBFE77B1E48A16504C3A38901156100CAFEC2ED939AE9A9EDFC9C0F8C7F55CC93E5DDD0B3DE1C6EDAE2B7EE34C6101F011B5904F693D286356B54C86CE8BCFEA9DBFEC21C1EF0ECC9105005BAA377D829DCA2CBF5EA5F31B71D446B833E00619819D7FC6024052499757A2765F19CD2B36C2488599DC5247494FABE81EEBEFD3BE75C4780E43A50418C5DB2FF359C5A6DE286EF5951E2709486EDC9CC49D0724ECA3F2C0B75F8A36CE862388F00B3C593D1C8C6AC45D73A72FF6B4F805B131ED4EAF5601D7B73B0E3724E75D58DD50F5871C54A37C1481331759F4BE86FB58A2EE003130F66E187C8BA5015BE713296589ACAFBF6596897E03D4920C91F26333B7BF1798AF815C93D4DF55BD47A08249BF113063FBB39503E9B6D43EAC7B0C305A diff --git a/tests/PQC_Intermediate_Values/ML-KEM-768.txt b/tests/PQC_Intermediate_Values/ML-KEM-768.txt new file mode 100644 index 0000000000..09cbbeb55f --- /dev/null +++ b/tests/PQC_Intermediate_Values/ML-KEM-768.txt @@ -0,0 +1,12 @@ +Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +keygen_z: 92AC7D1F83BAFAE6EE86FE00F95D813375772434860F5FF7D54FFC37399BC4CC +keygen_d: 92AC7D1F83BAFAE6EE86FE00F95D813375772434860F5FF7D54FFC37399BC4CC +encaps_m: 40BE9DCAC16E9CA73D49D0C83F9D3D89BB71574A4219A0F393DFECE2988394C4 +encaps_ek: 1456A2EE8C3556054ABC79B4882C3190E5CA726AB402E5B09728C0F4F79C9FC2ADD828ABE432B1501B60F46CCBC86A3378C34895708A13671B20B389479AAA01C69D6B3B7D07D1C3AB54B91C580F5A336B30069A4F134FFD3764CE73A047E2844771742BF4710B972D4F6590A1C53A975368C271B670F1A4036441054A66E8815997512288552FD7149FFB705AAE133F8414060D0092FA8A1627D78AB2ABC6696288BAF5C60EF370827A7EFA72AE5C6741A5DA043D5940F121485372A98F472D60F05F74D95F01A1991E73A3E0A9536467A4738AB4CF385BA772827EB8CC058B3572E40B598444C181C7F6D9B760A7B907092E9C3351EA234E4449BD9B61A134654E2DA191FF0793961569D3594448BBC2586999A6671EFCA957F3A6699A4A1B2F4707ABA0B2DB20114FE68A4E2815AF3AAC4B8C6BE5648C50CC35C27C57288028D361708D302EEBB860BEE691F656A2550CB321E9293D7516C599817B766BA928B108779A1C8712E74C76841AC58B8C515BF4749BF715984445B2B53063384001E55F68867B1AF46CA70CA8EA74172DB80B5218BDE4F00A0E658DB5A18D94E1427AF7AE358CCEB238772FCC83F10828A4A367D42C4CB6933FDD1C1C7B86AD8B009657A96222D7BA92F527AF877970A83247F47A23FC2285118B57717715204674DA9C94B62BC7838CF87200156B26BA4671159931C49322D80671A0F332EAA2BBF893BE408B9EAC6A505483AA9075BD1368B51F99211F480A9C542A75B5BE08E43ADAF301DD729A85954010E64892A2AA4F15C0BD70B3D856494FF9BA0FE4CE12991CA06B5E3D0B2AF1F797B7A2B760910AE9F833D0D4267A58052C2990F161B886E251711C09D085C3D958B144192C9CC3224A460715B6784EB0B26F237187507D85C5110ACC71CE47198F254553356DAB448C38D243A7C02BE40C908C828D05C081DFAB8FC6B5CFE7D56E7317157DC053B2B3489986B081288871818585E09931095E3274A084115BE276438254A796270A7B4306F08B98D9C2AAECF7065E74446B7C696DBAAF8B4625A10B07827B4A8BABAB09B64AE1C375BB785441F319FB9AC2F14C95FFB252ABBB809C6909CD97706E40691CBA61C9252BD38A04311CA5BB2CA79578347505D0888851E082648BD003BE97C0F8F66759EC96A96A081C6822C4510559537042FC15F069A649B74A10961B354A1F625B04E25B293CF65FB4F53A80CC733D7A175775BF8A9ABB9201620E83A7F3E724D1287DBC44BDD5D85FC71545A927BEEDE537A7768735CC1486C7C3F31104DB67343F435D2D45554BAAC9CDB5822E8422AE8321C78ABE9F261FD4810A79E33E94E63B3341872C92253521997C084FBC060B8B125CCC88AC85AC5FE3168ACB059B3F119C4E050A20732F501BB9B3E687C846B5C2653F8886373E1004A2AB8D1BB970A7E571D8A46EE81B782F26942DD394FDD9A5E4C5631D985528604B1CC976275B6AC8A67CEEC10FFACBBA3D3BB141321DFC3C9231FC96E448B9AB847021E2C8D90C6BCAF2B1240783B62C79DEDC072A5763E660AF2C27C3F0C3C09207CAD990BB41A7BFCEC99F51596A0E83778F85C006AC6D1FE981B4C4BA1CB575A7D07AE2D31BA760095F74BC163841CF8FF77F894ABC6D261ED87A4530363B949C4AD24EFB3A56809478DDA2 +encaps_K: 616E0B753A3B7F40FEF9A389F58F16BFBB04622941D2464BDAE767820DFAC38E +decaps_dk: 3456859BF707E672AC712B7E70F5427574597502B81DE8931C92A9C0D22A8E1773CB87472205A31C32206BA4BCF42259533CB3A19C0200860244A6C3F6921845B0A05850187A4310B3D5223AAAA0C79B9BBCFCCB3F751214EB0CFAC1A29ED8848A5A49BA84BA68E6B6F5057D493105FF38A9F44B4E7F6CBE7D216408F7B48605B270B253B001A5401C0C9127CC185B1B0CF92B99FBA0D95A295F873515520C86321B8C966C837AAB34B2BFFAB2A2A4301B356B26CDC4563802901B4762F284281A382E5F762BEF47B519A81A108657EBE962BE120B5FB3B9ED338CCF47B3A03952A16633F6E6B534E6B63D05706EFA0F94C03A2B856AE551422F9011F2589A41B96A2CD213C6999B09E91FF423CB106A1A920B84B811469497154223987F005C72F8AF388B090C639F8C774FC5A294C74A212C91A86C328AEBEA558AB43F8B873534FA2EF9E66CEF3C52CD471AB78375E745B9D0AA65D2278B9275AE5348B16CF62AC8065734E4BD77B80CCF897605EB76F485AF8A0B466557A83C0292CCF903EE7AA57C3B51AD660189B86139E380425B31A92689DF2431BFA7B69EAB1727451B29DA8B8BF851E1BC2D3A63134CA9663C57AEC6985CEBD56DB0447B136B017A974761C3C67D33772F9964E5434D643504332A3027294A078C599CB29163109CE3B56CE698B4D3F59E2956A1F03A4B955593F2D2457FFAAE9624A0711045B3F55292F20CC9D0CD791A21597B0F2CD980F3510F0B0239022000D735586EE6A73F3A3DCBD6BD1A85C86512ABF3C51CE00A0331F65360462C022329597A81C3F92FC17938C9138F4111387979C28F0334F90119221374DAB045929B49E43A9646A243F4464DAF811AB00630C75961BCD4AF5D99115A3749191BA8FD41CE0B3C89A695B4BB85064FD3AF95C9B4AEE09AC7B0CC69ECA36A004B6CD662A6D32795053EF0A03ADA3B98BFE3B46A79723E3A45AB3C31950669AD77072062CC3B504DF1334FD6909EAC7915F1D5AD16639F5FB564416454259134D565882CB381CBA58B76880767B50AC1B85795D7268433B371230ED4C72F99AB1AD1E595A459CF0A2334AA1463ADE4BDC9249605381857BB98095B41132946CA2457DFAA9149582AA19927B63689E2929AA41027BEF4921970BAD4A55490D91ABE251DEF4552CA88034106A02CE4B058F8B59624B67E063BF178B015E4281EB114A2BC2454943A4B4647122C42CBEA4E94154FD3E4B791F6290B782994206853D67000A633F320A8A374CA5D4038F9CA4244DCB02E9A84E1F7C8A821132B32B9A840557B34780665301724BA2606681D945E34D7CF941B8963CAA1001A491B8B2E43570E9AB95C0A57C503F0AB960B4856D0251574710FE5CB474284FC1049AA2A7B03694A1C763E99DAC6AD0BA8038B138A64432E349116A031E8C792781751BA473CBDF55720005ABDAA13D50182F0E633776BB0675C40472BAD1F9672769183D0CCC810BC25A8573220569F6AC4BAC22A1354D8B36C0580D0E5299E629C506CC7655546FF27810C97B51BA056BBF86ED9CB7C0A537F72D0CF9AD2C231E29EBF553F613CBB15B3721A20077E505FD390CB19F6488A107DEE1CAC58AB7034BA690300219595B3695C1234E8B57E33C8D3A048454A616DF3C9B56A6FF2026AF997725FC95579043BAE9399B6790D637B4FA820B0B2D2CAB607BAF6A372734C31EE0026F3C076D14A8E3EE66AAD8BBBCCEB9DC70C7B6BB0BB76C200C231601CA0873EC8710F4B18D57290B033727C601EDB71C2B0F0C21D553E0E7A4F77716839C7C8448ABB9F66A54E8A4B08A79D9A392CA1270031388BAD56217E32AEF55411974906A245C00712B3CBB1170685193FE25ACD7AC13D32073F3879A5D78375F0052CF79175BAB46D22370597BD06789EDD0711CC4243507A02B4FAADBB62250CC997AE0327AEB00DEB529192A64B1096A86B19674D0B0AF05C4AAE178C2C9A6442E94ED0A56033A11EE42632C0B4AA51D42150790F41062B77253C25BA4DE559761F0A90068389728BC977F70CF7BCCFBD883DF13C79F5F2C34312CB1D5A55D78C1B242096A8C0593CFB2753460BD30ABA306C74173995748385D00B3670E61324D87DE8A14450DC493768777FF0CE6810937A711229561A5EF2BB69861074E00BD93266E4B86269E18EEA2CAACB60A1358636CD7A7CA6BB682130241784B101EA5BFD6C3A07158621614736F6996D5A4E14963A12D836E533A0C8912DB7E11685A4A53D8285F08750DFF66DA27C23B97542DEFB99E470ACD5E647C940CB57301B43CC3E68E64E28B06770695EF609265E06C60F22CB875849E62BAB88CC10ECF622C379CB54F13D8B2BAC902B9AB02BB330B45AC8B741C2647AC45B5BF48A6D3FE039986CC940C60A94E66CF644531016A5272450824314B5662A0A909ABFB46FD27BAED3ABA8259361596882B08B2AC7233930FC3786738ED2F81EE638C45C3B9CFD1951DB5BCC1445C2C1625D57D57B53904B6A1AB681580755E89FA79775A657CD62B4426304BC0C711E2807A2C9E852D4B4359EE6B53E4675F523C90782572DC7368FB400C328C70FC846B5E98A4330BBB627BDD784B4DAF0B1F645944942B4C2B6225C8B31E989545522BA6F10396034CB1CA745977844D570894C611A5608A757416D6DE59963C32798C493EFD2264C231910E9A30090CA7B5384F231B89BA68A238190EF1A2A43CB01703470A0F061A70738944BCD9B7004F24797AECB88B1091CFED0590B0415453C39B6EC45B66305FAEA6B55A4B7967505FE3862A267ADBFE05B9181A06501893391650EAAA4A6D16853349276F98E0F44CD726615C61C16713094D8AB093CAC71F2803E7D39109EF5009C9C2CDAF7B7A6B37A33A49881F4BB5D7245A14C5042280C76A84E63F49D0D619D46D723BAA747A3BA90A6FB637A9A1DC02268FD5C043D18CBA1528AC8E225C1F923D1CC84F2E78E25DC3CCE9353C9DAC2AD726A79F64940801DD5701EFBDCB80A98A25993CD7F80591320B63172718647B976A98A771686F0120A053B0C4474604305890FECAF23475DDCC11BC08A9C5F592ABB1A153DB1B883C0507EB68F78E0A14DEBBFEEC621E10A69B6DAAFAA916B539533E508007C4188CE05C862D101D4DB1DF3C4502B8C8AE1457488A36EAD2665BFACB321760281DB9CA72C7614363404A0A8EABC058A23A346875FA96BB18AC2CCF093B8A855673811CED47CBE1EE81D2CF07E43FC4872090853743108865F02C5612AA87166707EE90FFD5B8021F0AA016E5DBCD91F57B3562D3A2BCFA20A4C03010B8AA144E6482804B474FEC1F5E138BE632A3B9C82483DC6890A13B1E8EE6AF714EC5EFAC3B1976B29DADB605B14D3732B5DE118596516858117E2634C4EA0CC +decaps_c: DFA6B9D72A63B420B89DDE50F7E0D56ECF876BFEF991FCE91C8D286FA6EABAC1730FD87741FE4AD717B282A21E235A55C3757D88D4CE62F414EB77EB9D357EE29D00087BF8110E5BBBC7C90419072EAE044BF7E183D43A94B2632AA14649619B70649521BC19370942EF70F36C34C8C23591EE0CA71A12D279E0F52D39ED0F913F8C262621FB242E680DEB307B0749C6B393A8EF66F8B04AAFA877B951AB93F598B4B2FAB04F88AC803984FF37E3FE74F3A616D5314EB3A826F874F8ECD3A5647D04942A57EFC09638470DC0A9DF40B317571D3984A78CF7D11751090722B3059E07591CC4A2ED9BA0DCE99BE9E5EE5DB8D698CDEB5814759BA977C90079CF2AFDE478069C513A60091A3A5D0111E22DE06CB145C14E22A214CB278C8152B0681BCAFF54D552B54A671C0DFEF775E7C54FEFC4853868C955971ABDAC2A76292CCCD4FD1C706B7D3614159673E9D7B29A2D3F63363129E7A21E803A460F2714E3E25922780AF38257CD1495ACD1E01980638DF58A153DAB07EFB5C7E78ADACF631956D69CCDA070459568BD9D11A2934BCF1643BC99468238910B1F742EBB3C03D39FD45CFB85BA309E29DD9B5CD560819EC729FCAC8B9D725E3E8ABEDE4B5298A8658EE3F781B0CE683CBB7335CD57EFE2204A8F197446D7314CDBF4C5D08CCC41F80857CC9571FBFB906060F7E17C8CEF0F274AFF83E393B15F2F9589A13AF4BC78E16CDDE62361D63B8DC903B70C01A43419CD2052150BD28719F61FF31F4A9BEC4DDBCEC1F8FB2EFBF37DFFFA4C7FECA8CE6D626BFDA16EE708D9206814A2EF988525615D4AC9BE608C4B03ABEE95B32A5DB74A96119A7E159AF99CD98E88EAF09F0D780E7C7E814B8E88B4F4E15FA54995D0ECBAD3EF046A4947F3E8B9E744241489B806FE9401E78BAFC8E882E9D6D0700F720C0024E7DA49061C5D18A62074040ABC0003200ED465231797930A2E2AA501F64862DDA13014A99F9D3270AA907EEB3FDBFF291600DF1F6B39684B11E396B70D86F90492E82B09BA25607B0C286FBC070182AC76FA7C859AAFEA87016AED22C3605A2789A1D439FD8D933342DAB745A3E550E7D77C01A6234BDA7D6BB19D495E6560FCE8396FC3C6E088ED60F5F2771416EA3BE5BE472B6404906C91E71D9A8672F390083655AB7D0EC6EDFE86789CE20BE2EA90CA5CC31416FB24CBAF94DA1468FE696BCDF5247CF117CBE9334076CA6896B2F6A016B1F7C73728807898D8B199756C2B0AA2457E1B4F7754C4576CE5645614EA15C1AE28B094EB217C7A7A41239576CBDA380EE68783432730AD5EBE7F51D6BE7FB02AB37BE0C96AAC9F3C790A18D159E6BABA71EC88C110FD84C336DF630F271CF79328B6C879DF7CDE0F70712220B1FBB9ACB48248D91F0E2B6E3BE40C2B221E626E7E330D9D83CC0668F7308591E14C7D72B841A6F05F3FDC139EECC1536765650B55A9CEC6BBF54CCEC5C3AC9A0E39F48F237BD4C660CB1A8D250BB6C8C010FEC34CC3D91599271C7531330F12A3E44FAFD905D2C6 +decaps_KPrime: BD7256B242F404869D662F80BF677A16C0C6FC1568CCA5B64582A01A6A142D71 +keygen_ek: D2E69A05534A7232C5F1B766E93A5EE2EA1B26E860A3441ADEA91EDB782CABC8A5D011A21BC388E7F486F0B7993079AE3F1A7C85D27D0F492184D59062142B76A43734A90D556A95DC483DD82104ED58CA1571C39685827951434CC1001AA4C813261E4F93028E14CD08F768A454310C3B010C83B74D04A57BB977B3D8BCF3AAA78CA12B78F010D95134928A5E5D96A029B442A41888038B29C2F122B0B6B3AF121AEA29A05553BDF1DB607AFB17001860AF1823BCF03DB3B441DA163A28C523A5FB4669A64234A4BCD1217FF2635BD97680FF938DBCF10E9532A9A79A5B073A9E8DB2123D210FAEA200B664838E80071F2BA254AAC890A46E28EC342D92812B01593071657E7A3A4A75CB3D5279CE88405AC5ADACB2051E022EE0AC9BBFE32DEF98667ED347ADCB3930F3CAD031391B709A4E61B8DD4B3FB741B5BD60BF304015EE7546A24B59EADCA137C7125074726B7686EC551B7BC26BBDB20FC3783534E34EE1F1BC6B77AB49A6667846975778C3C536830450A3FA910259722F3F806E6EB4B9346763FEF0922BC4B6EB3826AFF24EADC6CF6E477C2E055CFB7A90A55C06D0B2A2F5116069E64A5B5078C0577BC8E7900EA71C341C02AD854EA5A01AF2A605CB2068D52438CDDC60B03882CC024D13045F2BA6B0F446AAA5958760617945371FD78C28A40677A6E72F513B9E0667A9BAF446C1BA931BA81834234792A2A2B2B3701F31B7CF467C80F1981141BB457793E1307091C48B5914646A60CE1A301543779D7C3342AD179796C2C440D99DF9D41B52E32625A82AA5F579A9920BFFBA964FA70DB259C85E68C813817B1347BF19814DA5E9364A4645E621923D955C211A55D355C816DA04730AA324085E622B51D6109B49F673ADD00E414755C8024AA0164F24556DED963D61143856CB4FF0567E3320730DBCBF12F66E2B70B20054A6DEA42614B50EF72B156F5149FC263DD7E039C55A3EE9827DF92C565D24C55E0A81C6494695344D948748AFBA9F762C0EA90BB724897902000775613949602C48C78A9440678C24086D326D79643BAF7036C66C7E026AAEFDA2807A60BD7FC91363BB0234A590984AA011F11D40268218A1588377B3D7671B8B99789919B86EE82B18EC22D4E80A1F27853D889419D460DEF7567AA4567969C43048C32B8462A9C9386EB3152A6976AA783CDD1A8C57A9B6BBD837A00624B58B4BA3DBB63BB8200E7BC88881BEBDA925BCA028E291AA1C22539CD04F90090D7F74108C32B8022C1591C881E76304E2408190E20F09A54FC23420E2620E9D87A3108A94FEEA72D5AB7FCFB972E6561B1A7B062F1A682E020AA2562812B296547B917824CDB88C582B5A6890177BC70C91ACAC9ABE290AEB2C34A7E2368955CB456A345368ABE3B91B47FC30B0233A09BA79FB11238AC508CCE61095F854C23204A8D36BFC2C6E05A72AF5244B17C12101E01451570EB110567E850E79C000142441FE4160027545F6290E85451B80234A9406C390B0CEA3C8335D4C6F8550B544C9343E61BA1C8489D1B0399739168AF740A481B0F5C3372530CA06B508ECE838AB78BEE1E597A9B14F6AEC7A3BD1AA8D10BAC23B9802902CD529AB6EF54DB3110CFB561E7E6948E65281250416C349C8100B3B4D3D0F62ACAD8D161175B134F7564937CD +keygen_dk: 19D74AD5472A8B2BAAD2A56702C9B3B5510EF3924858061D57F90DD9A1A01FEC2F57C51A888805341B617C515539597750835C3ED7A033B039D72491332C5DF4A69B6DF26171877AD1E50AC50100BE4728786685DA7A739E843FF0D45922D7281E210D5E82B944652F4862CFB3D902DE60AFD0A164471B26144A1D7A38096503095911762EBA7962C4511D05A128F2781ECB3D1F5BB1244237611ABAB924991F8A2732E27032357920F197C7692D60A9444472258CB457C1B71B77995469F3A962F3ABA6699614FCCCEA741E21C600C4357BBFAB452927C3D441BF8ED73152F75C08F540E186ACCA3326F422C84B988D77E61AE61859CF8541F89209E4983040C5617654808852B649B899A399AEC2C8BBA8A542F345ABF2813F65E9A791D32CC2D76026FB8D0C94B657489ABB487DA4A2C0E3868D3CF47F1CBB2FA79C53CFF6264777C09B177C91315484D2B30B0CA21F55ADD23C57E1911C3F086BCAD21798486EB47B7C58577381C09F5252582D1B27A7D5B8E060CE78209CC82BAE4DA606800C8DB1268F7AD2B793A44F34612CCEA31CE7D796A65A2691D61500625F83E7BE57077EE9C1B8C1CAA137CC4B6573308C19668B24B01E966903ABBCB79B67BE0A3E3E058AADA189B9EA80359AC26F4C5C53735FE4FC35247337760CCA3529B8D266BB6C48010654CDBC5A3E9757524675ABC413130CC2701F28933EABB8392B0D6D059CFC3A30326C4FCC810B37A4748C1C53928A4913E48B186697162C33FFFB06DD5161C8639DB195C6CA64829B2B3A2E4C9683B66DF7FB1909904E00020DBA134E02A168D76AC076BB77D4DC8496B4BBE7B4690BA29B62A91ABE72BEF323A44C8903E482B60D99BA61D1BBCF9CB9673534C1D647662374EE2C7C5F0081BAD149F44206717684D9746B2048633AF7A68C6865FB590358D8CF821458369B0C31EB597CF5BE78EB480EA04E35FACC380372C8C0A04DE276B1A72121E596CBB25EF7536AD3804184A87BDFB5A769160BFBB0CA3C360790E5562BB78EFE0069C77483AD35CAC237C61DE78A7DB46FC917124CA17510DB7DA218890F448EF6318613A1C97C928E2B7B6A54617BCCB6CDF278AE542B56AD7BB5ECD8C46A66C4FA0950CE41352CB85711890458F299BF40BA6FF2C0713862268B5F08E49845B09443997AB29A62073C0D9818C020167D4749231C059E6F483F976817C90C20A9C937079C2D4BE30DA974A97E4BC53ED96A55169F4A23A3EA24BD8E01B8FAEB95D4E53FFFECB60802C388A40F4660540B1B1F8176C9811BB26A683CA789564A2940FCEB2CE6A92A1EE45EE4C31857C9B9B8B56A79D95A46CB393A31A2737BAFEA6C81066A672B34C10AA98957C91766B730036A56D940AA4EBCB758B08351E2C4FD19453BF3A6292A993D67C7ECC72F42F782E9EBAA1A8B3B0F567AB39421F6A67A6B8410FD94A721D365F1639E9DDABFD0A6CE1A4605BD2B1C9B977BD1EA32867368D6E639D019AC101853BC153C86F85280FC763BA24FB57A296CB12D32E08AB32C551D5A45A4A28F9ADC28F7A2900E25A40B5190B22AB19DFB246F42B24F97CCA9B09BEAD246E1734F446677B38B7522B780727C117440C9F1A024520C141A69CDD2E69A05534A7232C5F1B766E93A5EE2EA1B26E860A3441ADEA91EDB782CABC8A5D011A21BC388E7F486F0B7993079AE3F1A7C85D27D0F492184D59062142B76A43734A90D556A95DC483DD82104ED58CA1571C39685827951434CC1001AA4C813261E4F93028E14CD08F768A454310C3B010C83B74D04A57BB977B3D8BCF3AAA78CA12B78F010D95134928A5E5D96A029B442A41888038B29C2F122B0B6B3AF121AEA29A05553BDF1DB607AFB17001860AF1823BCF03DB3B441DA163A28C523A5FB4669A64234A4BCD1217FF2635BD97680FF938DBCF10E9532A9A79A5B073A9E8DB2123D210FAEA200B664838E80071F2BA254AAC890A46E28EC342D92812B01593071657E7A3A4A75CB3D5279CE88405AC5ADACB2051E022EE0AC9BBFE32DEF98667ED347ADCB3930F3CAD031391B709A4E61B8DD4B3FB741B5BD60BF304015EE7546A24B59EADCA137C7125074726B7686EC551B7BC26BBDB20FC3783534E34EE1F1BC6B77AB49A6667846975778C3C536830450A3FA910259722F3F806E6EB4B9346763FEF0922BC4B6EB3826AFF24EADC6CF6E477C2E055CFB7A90A55C06D0B2A2F5116069E64A5B5078C0577BC8E7900EA71C341C02AD854EA5A01AF2A605CB2068D52438CDDC60B03882CC024D13045F2BA6B0F446AAA5958760617945371FD78C28A40677A6E72F513B9E0667A9BAF446C1BA931BA81834234792A2A2B2B3701F31B7CF467C80F1981141BB457793E1307091C48B5914646A60CE1A301543779D7C3342AD179796C2C440D99DF9D41B52E32625A82AA5F579A9920BFFBA964FA70DB259C85E68C813817B1347BF19814DA5E9364A4645E621923D955C211A55D355C816DA04730AA324085E622B51D6109B49F673ADD00E414755C8024AA0164F24556DED963D61143856CB4FF0567E3320730DBCBF12F66E2B70B20054A6DEA42614B50EF72B156F5149FC263DD7E039C55A3EE9827DF92C565D24C55E0A81C6494695344D948748AFBA9F762C0EA90BB724897902000775613949602C48C78A9440678C24086D326D79643BAF7036C66C7E026AAEFDA2807A60BD7FC91363BB0234A590984AA011F11D40268218A1588377B3D7671B8B99789919B86EE82B18EC22D4E80A1F27853D889419D460DEF7567AA4567969C43048C32B8462A9C9386EB3152A6976AA783CDD1A8C57A9B6BBD837A00624B58B4BA3DBB63BB8200E7BC88881BEBDA925BCA028E291AA1C22539CD04F90090D7F74108C32B8022C1591C881E76304E2408190E20F09A54FC23420E2620E9D87A3108A94FEEA72D5AB7FCFB972E6561B1A7B062F1A682E020AA2562812B296547B917824CDB88C582B5A6890177BC70C91ACAC9ABE290AEB2C34A7E2368955CB456A345368ABE3B91B47FC30B0233A09BA79FB11238AC508CCE61095F854C23204A8D36BFC2C6E05A72AF5244B17C12101E01451570EB110567E850E79C000142441FE4160027545F6290E85451B80234A9406C390B0CEA3C8335D4C6F8550B544C9343E61BA1C8489D1B0399739168AF740A481B0F5C3372530CA06B508ECE838AB78BEE1E597A9B14F6AEC7A3BD1AA8D10BAC23B9802902CD529AB6EF54DB3110CFB561E7E6948E65281250416C349C8100B3B4D3D0F62ACAD8D161175B134F7564937CDECE9E246AAD11021A67B20EB8F7765AC2823A9D18C93EC282D6DBC53CD6DF57592AC7D1F83BAFAE6EE86FE00F95D813375772434860F5FF7D54FFC37399BC4CC +encaps_c: 778D6B03791ACAF56CAAFCC78CEE5CBCA1DE8737E9C7FF4AE5F384D344E08223C74C824CB5848520517C7F0EA0645EB6F889517AE5216B0CF41DDC3F0D1DF9BC6E4DECB236A5EA8B214F64266D3CDE08E0CB00E5D91F586706B1EE533D20476F4423B78F916B1726EEEA959FFB9AC634D04A94D09923CB0D4E730CCA4144E7C4884921652DA4928C68E644F673CFC57D3E87CF5BE581A89F9CB8F0FCE2782D681E5CE88AF58458C3D63D807572DE5AA8E1FAF2DCD14EDB7349565B7D3271DDBEB0B6CC7AFE08635784311159733C46E5FDC5E0CD36CE5685ACFB1AFE50ABB46F447521E60D9C8F0E4CA28C190ABB40C365F412471E95A8EA396D4BD8070EEB1F02B07C825367AA1EC0F10C3862416BB21AD6CA748A86E9829EFC1A0499093C85176D37F574C75CF5EDFA8D920D3268CB34C6A4BB0002869BC05D7C8FCC0658D4A01EACD74557A37D98A763074752DFDD6429881CAFF577D3A048031BD52C4E9726398590F9519FD59405D6B3C307AFCB168A985785D954A6D1DC1EA92E1EB6F946A4D99DD6CA307ABFD8362FABA98BB264C69C5F555D60883CC56019FEB4E8000C48B7E68CD667F00B5250CEF293A4A9E778726E62F120361E21AB3140464CDC6ABDE9EA05198D8B3BB671B9111A2F317582847CA5015664F22CDB08C143187BDE2129B54F34160295D75FE9A494FD7E67AAA76B57AAFFD89D01A71DF5C8158620298D582BBEFA6D09AC412A99AA3BE9C383504948C43DD5AF4127B1435804F44BAFA142BFC2A95D95FB2EF0641ABE71064DE51D6B9EC50857B8EEF7F48036313D0E936763B8F7BDE69B064DD5761D80EA6F1A8B37565753C579BBB895EFB9FCB3FC5FA3362E3774F0F77140B973CAE587BAD2F3B566A9C25A969347E5C54F87F1105E9C074867D94077CCAE3ABEA54520EDB51D9DAABE7848E78FDF66E07E2E22B30251931E890BAF1F5E177D4D9CEC9E4969481FD7C1335A0ED5879F34EF4BB4F66C28803CEA162BA461506D52EB3AE16951922B06825186C3D4CE1B51F3C92F3C52F2D04D1F13B2B17C9EEB882CCE0EB88B7EA9A1CE4E37415CC84C7BC436A4628386CC77D9AFD207911BD9BFD8A7FA05C275BE0C4C6A8FC0A61BDA1D67AE33B5310BE1290DC71C1418EB5744BF2842C1652173A49A692E71FE43258A205B3CAAB90C0304A51E77D01B404A01FAE2F83AB80C5DBF6CF518C001F46A633FA169B1BDB77A9D0B1E0C007835C09F6ABBA96F3F53564DA508EE8861A483A81749D4A44672B1EF1605F29D168B74B736B4F13501D7AD1213118A7832E666A50BE8010D54322A526CF7A4E543A79D0D98E004FBEC76EA3F7E887BDBAF50DADFDDDF3FFECF6D3F77EA4B9B16DC754F4A68E5EF32F6A137E7C9E3C3E8C2E236C7EBC45D46EC1677A5A8BB2668443B0BE8693DC257F13D8B9A90100B92B4D1761B819673832C32020671BFB3D0220A363E4BED6D649D3F7368CFE081E196A43D4708798E31BB2A2F61824674ABA2FC9DCD05DB84B8627AE11488886F921BC79AE1FD03 diff --git a/tests/PQC_Intermediate_Values/fetch_values.sh b/tests/PQC_Intermediate_Values/fetch_values.sh new file mode 100755 index 0000000000..ced9d554b4 --- /dev/null +++ b/tests/PQC_Intermediate_Values/fetch_values.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT + +# This script fetches the NIST vectors for the ML-KEM-ipd and ML-DSA-ipd and extracts the values we use for testing + +wget https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip +unzip PQC_Intermediate_Values.zip + +file_keygen="PQC_Intermediate_Values/Key Generation -- " +file_signature="PQC_Intermediate_Values/Signature Generation -- " +file_verification="PQC_Intermediate_Values/Signature Verification -- " + +for VARIANT in "ML-DSA-44" "ML-DSA-65" "ML-DSA-87" +do + echo "Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip" >> "$VARIANT.txt" + grep "seed: " "$file_keygen$VARIANT.txt" >> "$VARIANT.txt" + grep "message: " "$file_signature$VARIANT.txt" | sed 's/message: /sig_message: /g' >> "$VARIANT.txt" + grep "sk: " "$file_signature$VARIANT.txt" | sed 's/sk: /sig_sk: /g' >> "$VARIANT.txt" + grep "rnd: " "$file_signature$VARIANT.txt" >> "$VARIANT.txt" + + grep "signature: " "$file_verification$VARIANT.txt" | sed "s/signature: /verif_signature: /g" >> "$VARIANT.txt" + grep "pk: " "$file_verification$VARIANT.txt" | sed "s/pk: /verif_pk: /g" >> "$VARIANT.txt" + grep "message: " "$file_verification$VARIANT.txt" | sed "s/message: /verif_message: /g" >> "$VARIANT.txt" + + grep "pk: " "$file_keygen$VARIANT.txt" | sed "s/pk: /keygen_pk: /g" >> "$VARIANT.txt" + grep "sk: " "$file_keygen$VARIANT.txt" | sed 's/sk: /keygen_sk: /g' >> "$VARIANT.txt" + grep "signature: " "$file_signature$VARIANT.txt" | sed "s/signature: /sig_signature: /g" >> "$VARIANT.txt" +done + +file_keygen="PQC_Intermediate_Values/Key Generation -- " +file_encaps="PQC_Intermediate_Values/Encapsulation -- " +file_decaps="PQC_Intermediate_Values/Decapsulation -- " + +for VARIANT in "ML-KEM-512" "ML-KEM-768" "ML-KEM-1024" +do + echo "Origin of the values: https://csrc.nist.gov/csrc/media/Projects/post-quantum-cryptography/documents/example-files/PQC%20Intermediate%20Values.zip" >> "$VARIANT.txt" + grep "z: " "$file_keygen$VARIANT.txt" | sed 's/z: /keygen_z: /g' >> "$VARIANT.txt" + grep "d: " "$file_keygen$VARIANT.txt" | sed 's/d: /keygen_d: /g' >> "$VARIANT.txt" + + grep "m: " "$file_encaps$VARIANT.txt" | sed 's/m: /encaps_m: /g' >> "$VARIANT.txt" + grep "ek: " "$file_encaps$VARIANT.txt" | sed 's/ek: /encaps_ek: /g' >> "$VARIANT.txt" + grep "K: " "$file_encaps$VARIANT.txt" | sed 's/K: /encaps_K: /g' >> "$VARIANT.txt" + + grep "dk: " "$file_decaps$VARIANT.txt" | sed 's/dk: /decaps_dk: /g' >> "$VARIANT.txt" + grep "c: " "$file_decaps$VARIANT.txt" | sed 's/c: /decaps_c: /g' >> "$VARIANT.txt" + grep "KPrime: " "$file_decaps$VARIANT.txt" | sed 's/KPrime: /decaps_KPrime: /g' >> "$VARIANT.txt" + + grep "ek: " "$file_keygen$VARIANT.txt" | sed 's/ek: /keygen_ek: /g' >> "$VARIANT.txt" + grep "dk: " "$file_keygen$VARIANT.txt" | sed 's/dk: /keygen_dk: /g' >> "$VARIANT.txt" + + grep "c: " "$file_encaps$VARIANT.txt" | sed 's/c: /encaps_c: /g' >> "$VARIANT.txt" + +done \ No newline at end of file diff --git a/tests/constant_time/kem/issues.json b/tests/constant_time/kem/issues.json index ae0a6a065e..4431f4746f 100644 --- a/tests/constant_time/kem/issues.json +++ b/tests/constant_time/kem/issues.json @@ -24,5 +24,11 @@ "Kyber1024": [], "Kyber512": [], "Kyber768": [], + "ML-KEM-512-ipd": [], + "ML-KEM-768-ipd": [], + "ML-KEM-1024-ipd": [], + "ML-KEM-512": [], + "ML-KEM-768": [], + "ML-KEM-1024": [], "sntrup761": [] } diff --git a/tests/constant_time/kem/passes.json b/tests/constant_time/kem/passes.json index 824f1dae60..64f56c8ead 100644 --- a/tests/constant_time/kem/passes.json +++ b/tests/constant_time/kem/passes.json @@ -24,5 +24,11 @@ "Kyber1024": ["kyber"], "Kyber512": ["kyber"], "Kyber768": ["kyber"], + "ML-KEM-512-ipd": ["ml_kem"], + "ML-KEM-768-ipd": ["ml_kem"], + "ML-KEM-1024-ipd": ["ml_kem"], + "ML-KEM-512": ["ml_kem"], + "ML-KEM-768": ["ml_kem"], + "ML-KEM-1024": ["ml_kem"], "sntrup761": ["sntrup"] } diff --git a/tests/constant_time/kem/passes/ml_kem b/tests/constant_time/kem/passes/ml_kem new file mode 100644 index 0000000000..262ec9381b --- /dev/null +++ b/tests/constant_time/kem/passes/ml_kem @@ -0,0 +1,21 @@ +{ + Rejection sampling to produce public "A" matrix + Memcheck:Cond + fun:rej_uniform + fun:pqcrystals_ml_kem*_ref_gen_matrix + fun:pqcrystals_ml_kem*_ref_indcpa_* +} +{ + Rejection sampling to produce public "A" matrix + Memcheck:Cond + ... + fun:pqcrystals_ml_kem*_avx2_gen_matrix + fun:pqcrystals_ml_kem*_avx2_indcpa_* +} +{ + Rejection sampling to produce public "A" matrix + Memcheck:Value8 + ... + fun:pqcrystals_ml_kem*_avx2_gen_matrix + fun:pqcrystals_ml_kem*_avx2_indcpa_* +} diff --git a/tests/constant_time/sig/issues.json b/tests/constant_time/sig/issues.json index 37dc988271..2cb9f200bf 100644 --- a/tests/constant_time/sig/issues.json +++ b/tests/constant_time/sig/issues.json @@ -5,28 +5,34 @@ "Dilithium5": [], "Falcon-1024": ["falcon"], "Falcon-512": ["falcon"], - "SPHINCS+-SHA2-128f-robust": ["sphincs"], - "SPHINCS+-SHA2-128f-simple": ["sphincs"], - "SPHINCS+-SHA2-128s-robust": ["sphincs"], - "SPHINCS+-SHA2-128s-simple": ["sphincs"], - "SPHINCS+-SHA2-192f-robust": ["sphincs"], - "SPHINCS+-SHA2-192f-simple": ["sphincs"], - "SPHINCS+-SHA2-192s-robust": ["sphincs"], - "SPHINCS+-SHA2-192s-simple": ["sphincs"], - "SPHINCS+-SHA2-256f-robust": ["sphincs"], - "SPHINCS+-SHA2-256f-simple": ["sphincs"], - "SPHINCS+-SHA2-256s-robust": ["sphincs"], - "SPHINCS+-SHA2-256s-simple": ["sphincs"], - "SPHINCS+-SHAKE-128f-robust": ["sphincs"], - "SPHINCS+-SHAKE-128f-simple": ["sphincs"], - "SPHINCS+-SHAKE-128s-robust": ["sphincs"], - "SPHINCS+-SHAKE-128s-simple": ["sphincs"], - "SPHINCS+-SHAKE-192f-robust": ["sphincs"], - "SPHINCS+-SHAKE-192f-simple": ["sphincs"], - "SPHINCS+-SHAKE-192s-robust": ["sphincs"], - "SPHINCS+-SHAKE-192s-simple": ["sphincs"], - "SPHINCS+-SHAKE-256f-robust": ["sphincs"], - "SPHINCS+-SHAKE-256f-simple": ["sphincs"], - "SPHINCS+-SHAKE-256s-robust": ["sphincs"], - "SPHINCS+-SHAKE-256s-simple": ["sphincs"] + "ML-DSA-44-ipd": [], + "ML-DSA-65-ipd": [], + "ML-DSA-87-ipd": [], + "ML-DSA-44": [], + "ML-DSA-65": [], + "ML-DSA-87": [], + "SPHINCS+-SHA256-128f-robust": ["sphincs"], + "SPHINCS+-SHA256-128f-simple": ["sphincs"], + "SPHINCS+-SHA256-128s-robust": ["sphincs"], + "SPHINCS+-SHA256-128s-simple": ["sphincs"], + "SPHINCS+-SHA256-192f-robust": ["sphincs"], + "SPHINCS+-SHA256-192f-simple": ["sphincs"], + "SPHINCS+-SHA256-192s-robust": ["sphincs"], + "SPHINCS+-SHA256-192s-simple": ["sphincs"], + "SPHINCS+-SHA256-256f-robust": ["sphincs"], + "SPHINCS+-SHA256-256f-simple": ["sphincs"], + "SPHINCS+-SHA256-256s-robust": ["sphincs"], + "SPHINCS+-SHA256-256s-simple": ["sphincs"], + "SPHINCS+-SHAKE256-128f-robust": ["sphincs"], + "SPHINCS+-SHAKE256-128f-simple": ["sphincs"], + "SPHINCS+-SHAKE256-128s-robust": ["sphincs"], + "SPHINCS+-SHAKE256-128s-simple": ["sphincs"], + "SPHINCS+-SHAKE256-192f-robust": ["sphincs"], + "SPHINCS+-SHAKE256-192f-simple": ["sphincs"], + "SPHINCS+-SHAKE256-192s-robust": ["sphincs"], + "SPHINCS+-SHAKE256-192s-simple": ["sphincs"], + "SPHINCS+-SHAKE256-256f-robust": ["sphincs"], + "SPHINCS+-SHAKE256-256f-simple": ["sphincs"], + "SPHINCS+-SHAKE256-256s-robust": ["sphincs"], + "SPHINCS+-SHAKE256-256s-simple": ["sphincs"] } diff --git a/tests/constant_time/sig/passes.json b/tests/constant_time/sig/passes.json index 9d4ea8ba93..fee99dcfcb 100644 --- a/tests/constant_time/sig/passes.json +++ b/tests/constant_time/sig/passes.json @@ -5,6 +5,12 @@ "Dilithium5": ["dilithium", "dilithium-avx2", "dilithium-aarch64"], "Falcon-1024": ["falcon_keygen", "falcon_sign"], "Falcon-512": ["falcon_keygen", "falcon_sign"], + "ML-DSA-44-ipd": ["ml_dsa", "ml_dsa-avx2"], + "ML-DSA-65-ipd": ["ml_dsa", "ml_dsa-avx2"], + "ML-DSA-87-ipd": ["ml_dsa", "ml_dsa-avx2"], + "ML-DSA-44": ["ml_dsa", "ml_dsa-avx2"], + "ML-DSA-65": ["ml_dsa", "ml_dsa-avx2"], + "ML-DSA-87": ["ml_dsa", "ml_dsa-avx2"], "SPHINCS+-SHA2-128f-robust": ["sphincs", "sphincs-sha2-avx2"], "SPHINCS+-SHA2-128f-simple": ["sphincs", "sphincs-sha2-avx2"], "SPHINCS+-SHA2-128s-robust": ["sphincs", "sphincs-sha2-avx2"], diff --git a/tests/constant_time/sig/passes/ml_dsa b/tests/constant_time/sig/passes/ml_dsa new file mode 100644 index 0000000000..f38940135b --- /dev/null +++ b/tests/constant_time/sig/passes/ml_dsa @@ -0,0 +1,80 @@ +{ + Rejection sampling for uniformly distributed public A matrix + Memcheck:Cond + fun:rej_uniform + fun:pqcrystals_ml_dsa*_ref_poly_uniform + fun:pqcrystals_ml_dsa*_ref_polyvec_matrix_expand +} +{ + Rejection sampling for s1 and s2 + Memcheck:Cond + fun:rej_eta + fun:pqcrystals_ml_dsa*_ref_poly_uniform_eta + fun:pqcrystals_ml_dsa*_ref_polyvec*_uniform_eta + fun:pqcrystals_ml_dsa*_ref_keypair +} +{ + Rejection sampling for y + Memcheck:Cond + fun:rej_gamma1m1 + fun:pqcrystals_ml_dsa*_ref_poly_uniform_gamma1m1 + fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for challenge + Memcheck:Cond + fun:pqcrystals_ml_dsa*_ref_poly_challenge + fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for challenge + Memcheck:Value8 + fun:pqcrystals_ml_dsa*_ref_poly_challenge + fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:154 # Call to polyvecl_chknorm + # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:163 # Call to polyveck_chknorm + # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:170 # Call to polyveck_chknorm + # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Hint does not need to be computed in constant time + Memcheck:Cond + ... + src:sign.c:174 # Call to polyveck_make_hint + # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for hint + Memcheck:Cond + ... + src:sign.c:175 # Checking number of 1 bits in hint + # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Packing routines do not need to be constant time + Memcheck:Cond + fun:pqcrystals_ml_dsa*_ref_pack_sig + fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Verification is not done in constant time + Memcheck:Cond + fun:pqcrystals_ml_dsa*_ref_verify +} diff --git a/tests/constant_time/sig/passes/ml_dsa-avx2 b/tests/constant_time/sig/passes/ml_dsa-avx2 new file mode 100644 index 0000000000..a9ad9fb3d1 --- /dev/null +++ b/tests/constant_time/sig/passes/ml_dsa-avx2 @@ -0,0 +1,155 @@ +{ + Rejection sampling for uniformly distributed public A matrix + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_4x + fun:pqcrystals_ml_dsa*_avx2_polyvec_matrix_expand_row* +} +{ + Rejection sampling for uniformly distributed public A matrix + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_4x + fun:pqcrystals_ml_dsa*_avx2_polyvec_matrix_expand_row* +} + + +{ + Rejection sampling for s1 and s2 + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta_4x + fun:pqcrystals_ml_dsa*_avx2_keypair +} +{ + Rejection sampling for s1 and s2 + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta_4x + fun:pqcrystals_ml_dsa*_avx2_keypair +} + +{ + Rejection sampling for y + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1_4x + fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for y + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1_4x + fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for s1 and s2 + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta_preinit + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta + fun:pqcrystals_ml_dsa*_avx2_keypair +} +{ + Rejection sampling for s1 and s2 + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta_preinit + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_eta + fun:pqcrystals_ml_dsa*_avx2_keypair +} +{ + Rejection sampling for y + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1_preinit + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1 + fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for y + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1_preinit + fun:pqcrystals_ml_dsa*_avx2_poly_uniform_gamma1m1 + fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for challenge + Memcheck:Cond + fun:pqcrystals_ml_dsa*_avx2_poly_challenge +} +{ + Rejection sampling for challenge + Memcheck:Value8 + fun:pqcrystals_ml_dsa*_avx2_poly_challenge +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:240 # Call to poly_chknorm + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:255 # Call to poly_chknorm + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Rejection sampling for signature distribution + Memcheck:Cond + ... + src:sign.c:262 # Call to poly_chknorm + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Hint does not need to be computed in constant time + Memcheck:Cond + ... + fun:pqcrystals_ml_dsa*_avx2_poly_make_hint + src:sign.c:266 # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Hint does not need to be computed in constant time + Memcheck:Value8 + ... + fun:pqcrystals_ml_dsa*_avx2_poly_make_hint + src:sign.c:266 # fun:pqcrystals_ml_dsa*_ref_signature +} +{ + Rejection sampling for hint + Memcheck:Cond + ... + src:sign.c:267 # Checking number of 1 bits in hint + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Hint positions are not secret + Memcheck:Cond + ... + src:sign.c:271 # memcpy + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Hint positions are not secret + Memcheck:Value8 + ... + src:sign.c:271 # memcpy + # fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Packing routines do not need to be constant time + Memcheck:Cond + fun:pqcrystals_ml_dsa*_avx2_pack_sig + fun:pqcrystals_ml_dsa*_avx2_signature +} +{ + Verification is not done in constant time + Memcheck:Cond + fun:pqcrystals_ml_dsa*_avx2_verify +} + diff --git a/tests/kat_sig.c b/tests/kat_sig.c index 9a3542cf49..db70d1dd33 100644 --- a/tests/kat_sig.c +++ b/tests/kat_sig.c @@ -62,6 +62,36 @@ OQS_STATUS combine_message_signature(uint8_t **signed_msg, size_t *signed_msg_le memcpy(*signed_msg, signature, signature_len); memcpy(*signed_msg + signature_len, msg, msg_len); return OQS_SUCCESS; + } else if (0 == strcmp(sig->method_name, "ML-DSA-44-ipd") || 0 == strcmp(sig->method_name, "ML-DSA-44")) { + // signed_msg = signature || msg + *signed_msg_len = signature_len + msg_len; + *signed_msg = malloc(*signed_msg_len); + if (*signed_msg == NULL) { + return OQS_ERROR; + } + memcpy(*signed_msg, signature, signature_len); + memcpy(*signed_msg + signature_len, msg, msg_len); + return OQS_SUCCESS; + } else if (0 == strcmp(sig->method_name, "ML-DSA-65-ipd") || 0 == strcmp(sig->method_name, "ML-DSA-65")) { + // signed_msg = signature || msg + *signed_msg_len = signature_len + msg_len; + *signed_msg = malloc(*signed_msg_len); + if (*signed_msg == NULL) { + return OQS_ERROR; + } + memcpy(*signed_msg, signature, signature_len); + memcpy(*signed_msg + signature_len, msg, msg_len); + return OQS_SUCCESS; + } else if (0 == strcmp(sig->method_name, "ML-DSA-87-ipd") || 0 == strcmp(sig->method_name, "ML-DSA-87")) { + // signed_msg = signature || msg + *signed_msg_len = signature_len + msg_len; + *signed_msg = malloc(*signed_msg_len); + if (*signed_msg == NULL) { + return OQS_ERROR; + } + memcpy(*signed_msg, signature, signature_len); + memcpy(*signed_msg + signature_len, msg, msg_len); + return OQS_SUCCESS; } else if (0 == strcmp(sig->method_name, "Falcon-512")) { // signed_msg = sig_len (2 bytes, big endian) || nonce (40 bytes) || msg || 0x29 || sig const uint16_t signature_len_uint16 = (uint16_t)signature_len; diff --git a/tests/test_alg_info.py b/tests/test_alg_info.py index 09b57fe808..bbe30c4b39 100644 --- a/tests/test_alg_info.py +++ b/tests/test_alg_info.py @@ -23,7 +23,7 @@ def test_alg_info_kem(kem_name): # find the parameter set in the datasheet foundit = False for parameter_set in datasheet['parameter-sets']: - if parameter_set['name'] == kem_name: + if parameter_set['name'] == kem_name or ('alias' in parameter_set and parameter_set['alias'] == kem_name): foundit = True # check that the values match assert(alg_info['claimed-nist-level'] == parameter_set['claimed-nist-level']) @@ -52,7 +52,7 @@ def test_alg_info_sig(sig_name): # find the parameter set in the datasheet foundit = False for parameter_set in datasheet['parameter-sets']: - if parameter_set['name'] == sig_name: + if parameter_set['name'] == sig_name or ('alias' in parameter_set and parameter_set['alias'] == sig_name): foundit = True # check that the values match assert(alg_info['claimed-nist-level'] == parameter_set['claimed-nist-level']) diff --git a/tests/test_kem_vectors.sh b/tests/test_kem_vectors.sh new file mode 100644 index 0000000000..0e64ade01f --- /dev/null +++ b/tests/test_kem_vectors.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT + +file=$2 +build_dir=$3 + +# input part +keygen_z=$(grep "keygen_z: " "$file" | sed 's/keygen_z: //g') +keygen_d=$(grep "keygen_d: " "$file" | sed 's/keygen_d: //g') + +encaps_m=$(grep "encaps_m: " "$file" | sed 's/encaps_m: //g') +encaps_ek=$(grep "encaps_ek: " "$file" | sed 's/encaps_ek: //g') +encaps_k=$(grep "encaps_K: " "$file" | sed 's/encaps_K: //g') + +decaps_dk=$(grep "decaps_dk: " "$file" | sed 's/decaps_dk: //g') +decaps_c=$(grep "decaps_c: " "$file" | sed 's/decaps_c: //g') +decaps_kprime=$(grep "decaps_KPrime: " "$file" | sed 's/decaps_KPrime: //g') + +# KAT part +keygen_pk=$(grep "keygen_ek: " "$file") +keygen_sk=$(grep "keygen_dk: " "$file") + +encaps_c=$(grep "encaps_c: " "$file") +encaps_K=$(grep "encaps_K: " "$file") + +output=$($build_dir/tests/vectors_kem $1 "$keygen_z$keygen_d$encaps_m" "$encaps_ek" "$encaps_k" "$decaps_dk" "$decaps_c" "$decaps_kprime") +if [ $? != 0 ]; then + echo "$output" + exit 1 +fi + +# Parse output: pk, sk, signature +output_pk=$(echo "$output" | grep "ek: " | sed 's/ek: /keygen_ek: /g') +output_sk=$(echo "$output" | grep "dk: " | sed 's/dk: /keygen_dk: /g') +output_c=$(echo "$output" | grep "c: " | sed 's/c: /encaps_c: /g') +output_K=$(echo "$output" | grep "K: " | sed 's/K: /encaps_K: /g') + +if [ "$keygen_pk" != "$output_pk" ]; then + echo "keygen_pk mismatch for $1" + echo "$keygen_pk\n$output_pk" + exit 1 +elif [ "$keygen_sk" != "$output_sk" ]; then + echo "keygen_sk mismatch for $1" + exit 1 +elif [ "$encaps_c" != "$output_c" ]; then + echo "$encaps_c\n$output_c" + echo "encaps_c mismatch for $1" + exit 1 +elif [ "$encaps_K" != "$output_K" ]; then + echo "encaps_K mismatch for $1" + exit 1 +else + echo "Vector tests succeeded for $1" +fi diff --git a/tests/test_leaks.py b/tests/test_leaks.py index 9522cce64d..e0e8f395d3 100644 --- a/tests/test_leaks.py +++ b/tests/test_leaks.py @@ -3,6 +3,7 @@ import helpers import os import pytest +import re import sys @helpers.filtered_test diff --git a/tests/test_sig_vectors.sh b/tests/test_sig_vectors.sh new file mode 100644 index 0000000000..4e859394f3 --- /dev/null +++ b/tests/test_sig_vectors.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT + +file=$2 +build_dir=$3 + +# input part +prng_output_stream=$(grep "seed: " "$file" | sed 's/seed: //g') + +sig_msg=$(grep "sig_message: " "$file" | sed 's/sig_message: //g') +sig_sk=$(grep "sig_sk: " "$file" | sed 's/sig_sk: //g') +sig_rnd=$(grep "rnd: " "$file" | sed "s/rnd: //g") + +verif_sig=$(grep "verif_signature: " "$file" | sed "s/verif_signature: //g") +verif_pk=$(grep "verif_pk: " "$file" | sed "s/verif_pk: //g") +verif_msg=$(grep "verif_message: " "$file" | sed "s/verif_message: //g") + +# KAT part +keygen_pk=$(grep "keygen_pk: " "$file") +keygen_sk=$(grep "keygen_sk: " "$file") +sig_signature=$(grep "sig_signature: " "$file") + +output=$($build_dir/tests/vectors_sig $1 "$prng_output_stream$sig_rnd" "$sig_msg" "$sig_sk" "$verif_sig" "$verif_pk" "$verif_msg") +if [ $? != 0 ]; then + exit 1 +fi + +# Parse output: pk, sk, signature +output_pk=$(echo "$output" | grep "pk: " | sed "s/pk: /keygen_pk: /g") +output_sk=$(echo "$output" | grep "sk: " | sed "s/sk: /keygen_sk: /g") +output_signature=$(echo "$output" | grep "signature: " | sed "s/signature: /sig_signature: /g") + +if [ "$keygen_pk" != "$output_pk" ]; then + echo "keygen_pk mismatch for $1" + exit 1 +elif [ "$keygen_sk" != "$output_sk" ]; then + echo "keygen_sk mismatch for $1" + exit 1 +elif [ "$sig_signature" != "$output_signature" ]; then + echo "sig_signature mismatch for $1" + exit 1 +else + echo "Vector tests succeeded for $1" +fi diff --git a/tests/test_vectors.py b/tests/test_vectors.py new file mode 100644 index 0000000000..2e3cc14b82 --- /dev/null +++ b/tests/test_vectors.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: MIT + +import helpers +import os +import pytest +import re +import sys + +@helpers.filtered_test +@pytest.mark.skipif(sys.platform.startswith("win"), reason="Not needed on Windows") +@pytest.mark.parametrize('kem_name', helpers.available_kems_by_name()) +def test_vectors_kem(kem_name): + if not(helpers.is_kem_enabled_by_name(kem_name)): pytest.skip('Not enabled' + kem_name) + result = helpers.run_subprocess( + ['tests/test_vectors.sh', kem_name], + ) + if kem_name + " not supported" in result: pytest.skip("Not supported") + +@helpers.filtered_test +@pytest.mark.skipif(sys.platform.startswith("win"), reason="Not needed on Windows") +@pytest.mark.parametrize('sig_name', helpers.available_sigs_by_name()) +def test_vectors_sig(sig_name): + if not(helpers.is_sig_enabled_by_name(sig_name)): pytest.skip('Not enabled') + result = helpers.run_subprocess( + ['tests/test_vectors.sh', sig_name], + ) + if sig_name + " not supported" in result: pytest.skip("Not supported") + +if __name__ == "__main__": + import sys + pytest.main(sys.argv) diff --git a/tests/test_vectors.sh b/tests/test_vectors.sh new file mode 100755 index 0000000000..e74523457d --- /dev/null +++ b/tests/test_vectors.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT + +if [[ -z "${OQS_BUILD_DIR}" ]]; then + build_dir=build +else + build_dir="${OQS_BUILD_DIR}" +fi + +if [[ "$1" = "ML-DSA-44-ipd" || "$1" = "ML-DSA-44" ]]; then + + file=tests/PQC_Intermediate_Values/ML-DSA-44.txt + scheme_name=ML-DSA-44-ipd + + sh tests/test_sig_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +elif [[ "$1" = "ML-DSA-65-ipd" || "$1" = "ML-DSA-65" ]]; then + + file=tests/PQC_Intermediate_Values/ML-DSA-65.txt + scheme_name=ML-DSA-65-ipd + + sh tests/test_sig_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +elif [[ "$1" = "ML-DSA-87-ipd" || "$1" = "ML-DSA-87" ]]; then + + file=tests/PQC_Intermediate_Values/ML-DSA-87.txt + scheme_name=ML-DSA-87-ipd + + sh tests/test_sig_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +elif [[ "$1" = "ML-KEM-512-ipd" || "$1" = "ML-KEM-512" ]]; then + + file=tests/PQC_Intermediate_Values/ML-KEM-512.txt + scheme_name=ML-KEM-512-ipd + + sh tests/test_kem_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +elif [[ "$1" = "ML-KEM-768-ipd" || "$1" = "ML-KEM-768" ]]; then + + file=tests/PQC_Intermediate_Values/ML-KEM-768.txt + scheme_name=ML-KEM-768-ipd + + sh tests/test_kem_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +elif [[ "$1" = "ML-KEM-1024-ipd" || "$1" = "ML-KEM-1024" ]]; then + + file=tests/PQC_Intermediate_Values/ML-KEM-1024.txt + scheme_name=ML-KEM-1024-ipd + + sh tests/test_kem_vectors.sh "$scheme_name" "$file" "$build_dir" + if [ $? != 0 ]; then + exit 1 + fi + +else + echo "$1 not supported" +fi diff --git a/tests/vectors_kem.c b/tests/vectors_kem.c new file mode 100644 index 0000000000..638652928c --- /dev/null +++ b/tests/vectors_kem.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: MIT + +// This tests the test vectors published by NIST CAVP + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "system_info.c" + +struct { + const uint8_t *pos; +} prng_state = { + .pos = 0 +}; + +/* Displays hexadecimal strings */ +static void OQS_print_hex_string(const char *label, const uint8_t *str, size_t len) { + printf("%-20s (%4zu bytes): ", label, len); + for (size_t i = 0; i < (len); i++) { + printf("%02X", str[i]); + } + printf("\n"); +} + +static void fprintBstr(FILE *fp, const char *S, const uint8_t *A, size_t L) { + size_t i; + fprintf(fp, "%s", S); + for (i = 0; i < L; i++) { + fprintf(fp, "%02X", A[i]); + } + if (L == 0) { + fprintf(fp, "00"); + } + fprintf(fp, "\n"); +} + +static uint8_t hexCharToDecimal(char c) { + if (c >= '0' && c <= '9') { + return (uint8_t) (c - '0'); + } else if (c >= 'a' && c <= 'f') { + return (uint8_t) (c - 'a' + 10); + } else if (c >= 'A' && c <= 'F') { + return (uint8_t) (c - 'A' + 10); + } else { + fprintf(stderr, "Invalid hex character: %c\n", c); + return 0; + } +} + +static void hexStringToByteArray(const char *hexString, uint8_t *byteArray) { + size_t len = strlen(hexString); + + if (len % 2 != 0) { + fprintf(stderr, "Hex string must have an even number of characters\n"); + exit(EXIT_FAILURE); + } + + for (size_t i = 0, j = 0; i < len; i += 2, j++) { + byteArray[j] = (uint8_t) ((hexCharToDecimal(hexString[i]) << 4) | hexCharToDecimal(hexString[i + 1])); + } +} + +/* HQC-specific functions */ +static inline bool is_ml_kem(const char *method_name) { + return (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_512_ipd)) + || (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_768_ipd)) + || (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_1024_ipd)) + || (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_512)) + || (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_768)) + || (0 == strcmp(method_name, OQS_KEM_alg_ml_kem_1024)); +} + +static void MLKEM_randombytes_init(const uint8_t *entropy_input, const uint8_t *personalization_string) { + (void) personalization_string; + prng_state.pos = entropy_input; +} + +static void MLKEM_randombytes(uint8_t *random_array, size_t bytes_to_read) { + memcpy(random_array, prng_state.pos, bytes_to_read); + prng_state.pos += bytes_to_read; +} + +static void MLKEM_randombytes_free(void) { + prng_state.pos = 0; +} + +OQS_STATUS kem_vector(const char *method_name, + uint8_t *prng_output_stream, + const uint8_t *encaps_pk, const uint8_t *encaps_K, + const uint8_t *decaps_sk, const uint8_t *decaps_ciphertext, const uint8_t *decaps_kprime) { + + uint8_t *entropy_input; + FILE *fh = NULL; + OQS_KEM *kem = NULL; + uint8_t *public_key = NULL; + uint8_t *secret_key = NULL; + uint8_t *ss_encaps = NULL; + uint8_t *ct_encaps = NULL; + uint8_t *ss_decaps = NULL; + OQS_STATUS rc, ret = OQS_ERROR; + int rv; + + void (*randombytes_init)(const uint8_t *, const uint8_t *) = NULL; + void (*randombytes_free)(void) = NULL; + + kem = OQS_KEM_new(method_name); + if (kem == NULL) { + printf("[vectors_kem] %s was not enabled at compile-time.\n", method_name); + goto algo_not_enabled; + } + + if (is_ml_kem(method_name)) { + OQS_randombytes_custom_algorithm(&MLKEM_randombytes); + randombytes_init = &MLKEM_randombytes_init; + randombytes_free = &MLKEM_randombytes_free; + entropy_input = (uint8_t *) prng_output_stream; + } else { + // Only ML-KEM-ipd supported + goto err; + } + + randombytes_init(entropy_input, NULL); + + fh = stdout; + + public_key = malloc(kem->length_public_key); + secret_key = malloc(kem->length_secret_key); + ss_encaps = malloc(kem->length_shared_secret); + ct_encaps = malloc(kem->length_ciphertext); + ss_decaps = malloc(kem->length_shared_secret); + if ((public_key == NULL) || (secret_key == NULL) || (ss_encaps == NULL) || (ct_encaps == NULL) || (ss_decaps == NULL)) { + fprintf(stderr, "[vectors_kem] %s ERROR: malloc failed!\n", method_name); + goto err; + } + + if ((prng_output_stream == NULL) || (encaps_pk == NULL) || (encaps_K == NULL) || (decaps_sk == NULL) || (decaps_ciphertext == NULL) || (decaps_kprime == NULL)) { + fprintf(stderr, "[vectors_kem] %s ERROR: inputs NULL!\n", method_name); + goto err; + } + + rc = OQS_KEM_keypair(kem, public_key, secret_key); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_kem] %s ERROR: OQS_KEM_keypair failed!\n", method_name); + goto err; + } + fprintBstr(fh, "ek: ", public_key, kem->length_public_key); + fprintBstr(fh, "dk: ", secret_key, kem->length_secret_key); + + rc = OQS_KEM_encaps(kem, ct_encaps, ss_encaps, encaps_pk); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_kem] %s ERROR: OQS_KEM_encaps failed!\n", method_name); + goto err; + } + + fprintBstr(fh, "c: ", ct_encaps, kem->length_ciphertext); + fprintBstr(fh, "K: ", ss_encaps, kem->length_shared_secret); + + rc = OQS_KEM_decaps(kem, ss_decaps, decaps_ciphertext, decaps_sk); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_kem] %s ERROR: OQS_KEM_decaps failed!\n", method_name); + goto err; + } + + rv = memcmp(ss_decaps, decaps_kprime, kem->length_shared_secret); + if (rv != 0) { + fprintf(stderr, "[vectors_kem] %s ERROR: shared secrets are not equal\n", method_name); + OQS_print_hex_string("ss_decaps", ss_decaps, kem->length_shared_secret); + goto err; + } + + ret = OQS_SUCCESS; + goto cleanup; + +err: + ret = OQS_ERROR; + goto cleanup; + +algo_not_enabled: + ret = OQS_SUCCESS; + +cleanup: + if (kem != NULL) { + OQS_MEM_secure_free(secret_key, kem->length_secret_key); + OQS_MEM_secure_free(ss_encaps, kem->length_shared_secret); + OQS_MEM_secure_free(ss_decaps, kem->length_shared_secret); + } + if (randombytes_free != NULL) { + randombytes_free(); + } + OQS_MEM_insecure_free(public_key); + OQS_MEM_insecure_free(ct_encaps); + OQS_KEM_free(kem); + return ret; +} + +int main(int argc, char **argv) { + OQS_STATUS rc; + + OQS_init(); + + if (argc != 8) { + fprintf(stderr, "Usage: vectors_kem algname prng_output_stream encaps_pk encaps_K decaps_sk decaps_ciphertext decaps_kprime\n"); + fprintf(stderr, " algname: "); + for (size_t i = 0; i < OQS_KEM_algs_length; i++) { + if (i > 0) { + fprintf(stderr, ", "); + } + fprintf(stderr, "%s", OQS_KEM_alg_identifier(i)); + } + fprintf(stderr, "\n"); + printf("\n"); + print_system_info(); + OQS_destroy(); + return EXIT_FAILURE; + } + + char *alg_name = argv[1]; + char *prng_output_stream = argv[2]; // z || d || m + + char *encaps_pk = argv[3]; + char *encaps_K = argv[4]; + + char *decaps_sk = argv[5]; + char *decaps_ciphertext = argv[6]; + char *decaps_kprime = argv[7]; + + uint8_t *prng_output_stream_bytes = NULL; + uint8_t *encaps_pk_bytes = NULL; + uint8_t *encaps_K_bytes = NULL; + uint8_t *decaps_sk_bytes = NULL; + uint8_t *decaps_ciphertext_bytes = NULL; + uint8_t *decaps_kprime_bytes = NULL; + + OQS_KEM *kem = OQS_KEM_new(alg_name); + if (kem == NULL) { + printf("[vectors_kem] %s was not enabled at compile-time.\n", alg_name); + rc = OQS_ERROR; + goto err; + } + + if (strlen(prng_output_stream) % 2 != 0 || + strlen(encaps_pk) != 2 * kem->length_public_key || + strlen(encaps_K) != 2 * kem->length_shared_secret || + strlen(decaps_sk) != 2 * kem->length_secret_key || + strlen(decaps_ciphertext) != 2 * kem->length_ciphertext || + strlen(decaps_kprime) != 2 * kem->length_shared_secret ) { + rc = OQS_ERROR; + goto err; + } + + prng_output_stream_bytes = malloc(strlen(prng_output_stream) / 2); + encaps_pk_bytes = malloc(kem->length_public_key); + encaps_K_bytes = malloc(kem->length_shared_secret); + decaps_sk_bytes = malloc(kem->length_secret_key); + decaps_ciphertext_bytes = malloc(kem->length_ciphertext); + decaps_kprime_bytes = malloc(kem->length_shared_secret); + + if ((prng_output_stream_bytes == NULL) || (encaps_pk_bytes == NULL) || (encaps_K_bytes == NULL) || (decaps_sk_bytes == NULL) || (decaps_ciphertext_bytes == NULL) || (decaps_kprime_bytes == NULL)) { + fprintf(stderr, "[vectors_kem] ERROR: malloc failed!\n"); + rc = OQS_ERROR; + goto err; + } + + hexStringToByteArray(prng_output_stream, prng_output_stream_bytes); + hexStringToByteArray(encaps_pk, encaps_pk_bytes); + hexStringToByteArray(encaps_K, encaps_K_bytes); + hexStringToByteArray(decaps_sk, decaps_sk_bytes); + hexStringToByteArray(decaps_ciphertext, decaps_ciphertext_bytes); + hexStringToByteArray(decaps_kprime, decaps_kprime_bytes); + + rc = kem_vector(alg_name, prng_output_stream_bytes, encaps_pk_bytes, encaps_K_bytes, decaps_sk_bytes, decaps_ciphertext_bytes, decaps_kprime_bytes); + +err: + OQS_MEM_insecure_free(prng_output_stream_bytes); + OQS_MEM_insecure_free(encaps_pk_bytes); + OQS_MEM_insecure_free(encaps_K_bytes); + OQS_MEM_insecure_free(decaps_sk_bytes); + OQS_MEM_insecure_free(decaps_ciphertext_bytes); + OQS_MEM_insecure_free(decaps_kprime_bytes); + + OQS_KEM_free(kem); + + OQS_destroy(); + + if (rc != OQS_SUCCESS) { + return EXIT_FAILURE; + } else { + return EXIT_SUCCESS; + } +} diff --git a/tests/vectors_sig.c b/tests/vectors_sig.c new file mode 100644 index 0000000000..6b1e646793 --- /dev/null +++ b/tests/vectors_sig.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: MIT + +// This tests the test vectors published by NIST CAVP + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "system_info.c" + +struct { + const uint8_t *pos; +} prng_state = { + .pos = 0 +}; + +static void fprintBstr(FILE *fp, const char *S, const uint8_t *A, size_t L) { + size_t i; + fprintf(fp, "%s", S); + for (i = 0; i < L; i++) { + fprintf(fp, "%02X", A[i]); + } + if (L == 0) { + fprintf(fp, "00"); + } + fprintf(fp, "\n"); +} + +static uint8_t hexCharToDecimal(char c) { + if (c >= '0' && c <= '9') { + return (uint8_t) (c - '0'); + } else if (c >= 'a' && c <= 'f') { + return (uint8_t) (c - 'a' + 10); + } else if (c >= 'A' && c <= 'F') { + return (uint8_t) (c - 'A' + 10); + } else { + fprintf(stderr, "Invalid hex character: %c\n", c); + return 0; + } +} + +static void hexStringToByteArray(const char *hexString, uint8_t *byteArray) { + size_t len = strlen(hexString); + + if (len % 2 != 0) { + fprintf(stderr, "Hex string must have an even number of characters\n"); + exit(EXIT_FAILURE); + } + + for (size_t i = 0, j = 0; i < len; i += 2, j++) { + byteArray[j] = (uint8_t) ((hexCharToDecimal(hexString[i]) << 4) | hexCharToDecimal(hexString[i + 1])); + } +} + +/* HQC-specific functions */ +static inline bool is_ml_dsa(const char *method_name) { + return (0 == strcmp(method_name, OQS_SIG_alg_ml_dsa_44_ipd)) + || (0 == strcmp(method_name, OQS_SIG_alg_ml_dsa_65_ipd)) + || (0 == strcmp(method_name, OQS_SIG_alg_ml_dsa_87_ipd)); +} + +static void MLDSA_randombytes_init(const uint8_t *entropy_input, const uint8_t *personalization_string) { + (void) personalization_string; + prng_state.pos = entropy_input; +} + +static void MLDSA_randombytes(uint8_t *random_array, size_t bytes_to_read) { + memcpy(random_array, prng_state.pos, bytes_to_read); + prng_state.pos += bytes_to_read; +} + +static void MLDSA_randombytes_free(void) { + prng_state.pos = 0; +} + +OQS_STATUS sig_vector(const char *method_name, + uint8_t *prng_output_stream, + const uint8_t *sig_msg, size_t sig_msg_len, const uint8_t *sig_sk, + const uint8_t *verif_sig, const uint8_t *verif_pk, const uint8_t *verif_msg, size_t verif_msg_len) { + + uint8_t *entropy_input; + FILE *fh = NULL; + OQS_SIG *sig = NULL; + uint8_t *msg = NULL; + uint8_t *public_key = NULL; + uint8_t *secret_key = NULL; + uint8_t *signature = NULL; + uint8_t *signed_msg = NULL; + size_t signature_len = 0; + size_t signed_msg_len = 0; + OQS_STATUS rc, ret = OQS_ERROR; + + void (*randombytes_init)(const uint8_t *, const uint8_t *) = NULL; + void (*randombytes_free)(void) = NULL; + + sig = OQS_SIG_new(method_name); + if (sig == NULL) { + printf("[sig_kat] %s was not enabled at compile-time.\n", method_name); + goto algo_not_enabled; + } + + if (is_ml_dsa(method_name)) { + OQS_randombytes_custom_algorithm(&MLDSA_randombytes); + randombytes_init = &MLDSA_randombytes_init; + randombytes_free = &MLDSA_randombytes_free; + entropy_input = (uint8_t *) prng_output_stream; + } else { + // Only ML-DSA-ipd supported + goto err; + } + + randombytes_init(entropy_input, NULL); + + fh = stdout; + + public_key = malloc(sig->length_public_key); + secret_key = malloc(sig->length_secret_key); + signature = malloc(sig->length_signature); + if ((public_key == NULL) || (secret_key == NULL) || (signature == NULL)) { + fprintf(stderr, "[vectors_sig] %s ERROR: malloc failed!\n", method_name); + goto err; + } + + rc = OQS_SIG_keypair(sig, public_key, secret_key); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_sig] %s ERROR: OQS_SIG_keypair failed!\n", method_name); + goto err; + } + fprintBstr(fh, "pk: ", public_key, sig->length_public_key); + fprintBstr(fh, "sk: ", secret_key, sig->length_secret_key); + + rc = OQS_SIG_sign(sig, signature, &signature_len, sig_msg, sig_msg_len, sig_sk); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_sig] %s ERROR: OQS_SIG_sign failed!\n", method_name); + goto err; + } + + fprintBstr(fh, "signature: ", signature, signature_len); + + rc = OQS_SIG_verify(sig, verif_msg, verif_msg_len, verif_sig, signature_len, verif_pk); + if (rc != OQS_SUCCESS) { + fprintf(stderr, "[vectors_sig] %s ERROR: OQS_SIG_verify failed!\n", method_name); + goto err; + } + + ret = OQS_SUCCESS; + goto cleanup; + +err: + ret = OQS_ERROR; + goto cleanup; + +algo_not_enabled: + ret = OQS_SUCCESS; + +cleanup: + if (sig != NULL) { + OQS_MEM_secure_free(secret_key, sig->length_secret_key); + OQS_MEM_secure_free(signed_msg, signed_msg_len); + } + if (randombytes_free != NULL) { + randombytes_free(); + } + OQS_MEM_insecure_free(public_key); + OQS_MEM_insecure_free(signature); + OQS_MEM_insecure_free(msg); + OQS_SIG_free(sig); + return ret; +} + +int main(int argc, char **argv) { + OQS_STATUS rc; + + OQS_init(); + + if (argc != 8) { + fprintf(stderr, "Usage: vectors_sig algname prng_output_stream sig_msg sig_sk verif_sig verif_pk verif_msg\n"); + fprintf(stderr, " algname: "); + for (size_t i = 0; i < OQS_SIG_algs_length; i++) { + if (i > 0) { + fprintf(stderr, ", "); + } + fprintf(stderr, "%s", OQS_SIG_alg_identifier(i)); + } + fprintf(stderr, "\n"); + printf("\n"); + print_system_info(); + OQS_destroy(); + return EXIT_FAILURE; + } + + char *alg_name = argv[1]; + char *prng_output_stream = argv[2]; + char *sig_msg = argv[3]; + size_t sig_msg_len = strlen(sig_msg) / 2; + char *sig_sk = argv[4]; + char *verif_sig = argv[5]; + char *verif_pk = argv[6]; + char *verif_msg = argv[7]; + size_t verif_msg_len = strlen(verif_msg) / 2; + + uint8_t *prng_output_stream_bytes = NULL; + uint8_t *sig_msg_bytes = NULL; + uint8_t *sig_sk_bytes = NULL; + uint8_t *verif_sig_bytes = NULL; + uint8_t *verif_pk_bytes = NULL; + uint8_t *verif_msg_bytes = NULL; + + OQS_SIG *sig = OQS_SIG_new(alg_name); + if (sig == NULL) { + printf("[vectors_sig] %s was not enabled at compile-time.\n", alg_name); + rc = OQS_ERROR; + goto err; + } + + if (strlen(prng_output_stream) % 2 != 0 || + strlen(sig_msg) % 2 != 0 || // variable length + strlen(sig_sk) != 2 * sig->length_secret_key || + strlen(verif_sig) != 2 * sig->length_signature || + strlen(verif_pk) != 2 * sig->length_public_key || + strlen(verif_msg) % 2 != 0) { // variable length + rc = OQS_ERROR; + goto err; + } + + prng_output_stream_bytes = malloc(strlen(prng_output_stream) / 2); + sig_msg_bytes = malloc(strlen(sig_msg) / 2); + sig_sk_bytes = malloc(sig->length_secret_key); + verif_sig_bytes = malloc(sig->length_signature); + verif_pk_bytes = malloc(sig->length_public_key); + verif_msg_bytes = malloc(strlen(verif_msg) / 2); + + if ((prng_output_stream_bytes == NULL) || (sig_msg_bytes == NULL) || (sig_sk_bytes == NULL) || (verif_sig_bytes == NULL) || (verif_pk_bytes == NULL) || (verif_msg_bytes == NULL)) { + fprintf(stderr, "[vectors_sig] ERROR: malloc failed!\n"); + rc = OQS_ERROR; + goto err; + } + + + hexStringToByteArray(prng_output_stream, prng_output_stream_bytes); + hexStringToByteArray(sig_msg, sig_msg_bytes); + hexStringToByteArray(sig_sk, sig_sk_bytes); + hexStringToByteArray(verif_sig, verif_sig_bytes); + hexStringToByteArray(verif_pk, verif_pk_bytes); + hexStringToByteArray(verif_msg, verif_msg_bytes); + + rc = sig_vector(alg_name, prng_output_stream_bytes, sig_msg_bytes, sig_msg_len, sig_sk_bytes, verif_sig_bytes, verif_pk_bytes, verif_msg_bytes, verif_msg_len); + +err: + OQS_MEM_insecure_free(prng_output_stream_bytes); + OQS_MEM_insecure_free(sig_msg_bytes); + OQS_MEM_insecure_free(sig_sk_bytes); + OQS_MEM_insecure_free(verif_sig_bytes); + OQS_MEM_insecure_free(verif_pk_bytes); + OQS_MEM_insecure_free(verif_msg_bytes); + + OQS_SIG_free(sig); + + OQS_destroy(); + + if (rc != OQS_SUCCESS) { + return EXIT_FAILURE; + } else { + return EXIT_SUCCESS; + } +}