diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md index e33519fb2..492a24323 100644 --- a/docs/algorithms/kem/ml_kem.md +++ b/docs/algorithms/kem/ml_kem.md @@ -9,7 +9,7 @@ - **Primary Source**: - **Source**: https://github.com/pq-code-package/mlkem-native/commit/84398e7230fa31ba4241f5eb36bdc3c1dbbd5bcd - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 -- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches +- **Optimized Implementation sources**: https://github.com/pq-code-package/mlkem-native/commit/84398e7230fa31ba4241f5eb36bdc3c1dbbd5bcd - **cupqc-cuda**: - **Source**: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e - **Implementation license (SPDX-Identifier)**: Apache-2.0 @@ -28,7 +28,9 @@ | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False | +| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -39,7 +41,9 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False | +| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -48,10 +52,12 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | +| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False | +| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. ## Explanation of Terms -- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments. +- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments. \ No newline at end of file diff --git a/src/kem/ml_kem/kem_ml_kem_1024.c b/src/kem/ml_kem/kem_ml_kem_1024.c index baf0bf778..52f6de69c 100644 --- a/src/kem/ml_kem/kem_ml_kem_1024.c +++ b/src/kem/ml_kem/kem_ml_kem_1024.c @@ -55,7 +55,10 @@ extern int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t * #endif /* OQS_USE_CUPQC */ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda) + return (OQS_STATUS) cupqc_ml_kem_1024_keypair(public_key, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *sec } OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda) + return (OQS_STATUS) cupqc_ml_kem_1024_enc(ciphertext, shared_secret, public_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shar } OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda) + return (OQS_STATUS) cupqc_ml_kem_1024_dec(shared_secret, ciphertext, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ diff --git a/src/kem/ml_kem/kem_ml_kem_512.c b/src/kem/ml_kem/kem_ml_kem_512.c index c0c1403e1..8f451b0fd 100644 --- a/src/kem/ml_kem/kem_ml_kem_512.c +++ b/src/kem/ml_kem/kem_ml_kem_512.c @@ -55,7 +55,10 @@ extern int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *s #endif /* OQS_USE_CUPQC */ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda) + return (OQS_STATUS) cupqc_ml_kem_512_keypair(public_key, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secr } OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda) + return (OQS_STATUS) cupqc_ml_kem_512_enc(ciphertext, shared_secret, public_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *share } OQS_API OQS_STATUS OQS_KEM_ml_kem_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda) + return (OQS_STATUS) cupqc_ml_kem_512_dec(shared_secret, ciphertext, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ diff --git a/src/kem/ml_kem/kem_ml_kem_768.c b/src/kem/ml_kem/kem_ml_kem_768.c index b78f94037..ef64c5c40 100644 --- a/src/kem/ml_kem/kem_ml_kem_768.c +++ b/src/kem/ml_kem/kem_ml_kem_768.c @@ -55,7 +55,10 @@ extern int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *s #endif /* OQS_USE_CUPQC */ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda) + return (OQS_STATUS) cupqc_ml_kem_768_keypair(public_key, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secr } OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda) + return (OQS_STATUS) cupqc_ml_kem_768_enc(ciphertext, shared_secret, public_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ @@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *share } OQS_API OQS_STATUS OQS_KEM_ml_kem_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) +#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda) + return (OQS_STATUS) cupqc_ml_kem_768_dec(shared_secret, ciphertext, secret_key); +#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */ +#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64) #if defined(OQS_DIST_BUILD) if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { #endif /* OQS_DIST_BUILD */ diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake index 399c607cc..d21a67f77 100644 --- a/src/oqsconfig.h.cmake +++ b/src/oqsconfig.h.cmake @@ -134,11 +134,11 @@ #cmakedefine OQS_ENABLE_KEM_ml_kem_512_aarch64 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_512_cuda 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_768 1 -#cmakedefine OQS_ENABLE_KEM_ml_kem_768_avx2 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_768_x86_64 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_768_aarch64 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_768_cuda 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1 -#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_avx2 1 +#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_x86_64 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024_aarch64 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024_cuda 1