diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md
index e33519fb2..492a24323 100644
--- a/docs/algorithms/kem/ml_kem.md
+++ b/docs/algorithms/kem/ml_kem.md
@@ -9,7 +9,7 @@
- **Primary Source**:
- **Source**: https://github.com/pq-code-package/mlkem-native/commit/84398e7230fa31ba4241f5eb36bdc3c1dbbd5bcd
- **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
-- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
+- **Optimized Implementation sources**: https://github.com/pq-code-package/mlkem-native/commit/84398e7230fa31ba4241f5eb36bdc3c1dbbd5bcd
- **cupqc-cuda**:
- **Source**: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
- **Implementation license (SPDX-Identifier)**: Apache-2.0
@@ -28,7 +28,9 @@
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False |
+| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -39,7 +41,9 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False |
+| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -48,10 +52,12 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | x86\_64 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | True | False | False |
+| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
## Explanation of Terms
-- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.
+- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.
\ No newline at end of file
diff --git a/src/kem/ml_kem/kem_ml_kem_1024.c b/src/kem/ml_kem/kem_ml_kem_1024.c
index baf0bf778..52f6de69c 100644
--- a/src/kem/ml_kem/kem_ml_kem_1024.c
+++ b/src/kem/ml_kem/kem_ml_kem_1024.c
@@ -55,7 +55,10 @@ extern int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *
#endif /* OQS_USE_CUPQC */
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_1024_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *sec
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_1024_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shar
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_1024_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
diff --git a/src/kem/ml_kem/kem_ml_kem_512.c b/src/kem/ml_kem/kem_ml_kem_512.c
index c0c1403e1..8f451b0fd 100644
--- a/src/kem/ml_kem/kem_ml_kem_512.c
+++ b/src/kem/ml_kem/kem_ml_kem_512.c
@@ -55,7 +55,10 @@ extern int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *s
#endif /* OQS_USE_CUPQC */
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_512_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secr
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_512_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *share
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_512_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_512_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
diff --git a/src/kem/ml_kem/kem_ml_kem_768.c b/src/kem/ml_kem/kem_ml_kem_768.c
index b78f94037..ef64c5c40 100644
--- a/src/kem/ml_kem/kem_ml_kem_768.c
+++ b/src/kem/ml_kem/kem_ml_kem_768.c
@@ -55,7 +55,10 @@ extern int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *s
#endif /* OQS_USE_CUPQC */
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_768_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -81,7 +84,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secr
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_768_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
@@ -107,7 +113,10 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *share
}
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
-#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+ return (OQS_STATUS) cupqc_ml_kem_768_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
+#if defined(OQS_ENABLE_KEM_ml_kem_768_x86_64)
#if defined(OQS_DIST_BUILD)
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
#endif /* OQS_DIST_BUILD */
diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake
index 399c607cc..d21a67f77 100644
--- a/src/oqsconfig.h.cmake
+++ b/src/oqsconfig.h.cmake
@@ -134,11 +134,11 @@
#cmakedefine OQS_ENABLE_KEM_ml_kem_512_aarch64 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_512_cuda 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_768 1
-#cmakedefine OQS_ENABLE_KEM_ml_kem_768_avx2 1
+#cmakedefine OQS_ENABLE_KEM_ml_kem_768_x86_64 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_768_aarch64 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_768_cuda 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1
-#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_avx2 1
+#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_x86_64 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_aarch64 1
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_cuda 1