From 959c697c8248f8fb129e4285f562280ca17f6daf Mon Sep 17 00:00:00 2001 From: Basil Hess Date: Tue, 21 Jan 2025 13:48:19 +0100 Subject: [PATCH] Pull update Signed-off-by: Basil Hess --- src/kem/ml_kem/CMakeLists.txt | 18 +- .../aarch64/src/arith_native_aarch64.h | 4 +- .../aarch64/src/clean_impl.h | 1 - .../aarch64/src/intt_clean.S | 85 ++-- .../aarch64/src/intt_opt.S | 85 ++-- .../aarch64/src/ntt_clean.S | 63 ++- .../aarch64/src/ntt_opt.S | 63 ++- .../aarch64/src/opt_impl.h | 2 - .../aarch64/src/poly_clean.S | 78 ++-- .../aarch64/src/poly_opt.S | 78 ++-- .../aarch64/src/polyvec_clean.S | 97 +++-- .../aarch64/src/polyvec_opt.S | 97 +++-- .../aarch64/src/rej_uniform_asm_clean.S | 79 +++- .../arith_backend.h | 2 + .../mlkem-native_ml-kem-1024_aarch64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-1024_aarch64/cbd.h | 35 +- .../mlkem-native_ml-kem-1024_aarch64/cbmc.h | 12 +- .../mlkem-native_ml-kem-1024_aarch64/common.h | 17 +- .../mlkem-native_ml-kem-1024_aarch64/config.h | 92 ++-- .../mlkem-native_ml-kem-1024_aarch64/debug.c | 60 +++ .../mlkem-native_ml-kem-1024_aarch64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-1024_aarch64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-1024_aarch64/indcpa.h | 8 +- .../mlkem-native_ml-kem-1024_aarch64/kem.c | 4 +- .../mlkem-native_ml-kem-1024_aarch64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-1024_aarch64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-1024_aarch64/ntt.h | 7 +- .../mlkem-native_ml-kem-1024_aarch64/params.h | 23 +- .../mlkem-native_ml-kem-1024_aarch64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-1024_aarch64/poly.h | 301 +++++-------- .../polyvec.c | 194 ++++++++- .../polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-1024_aarch64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-1024_aarch64/verify.c | 11 +- .../mlkem-native_ml-kem-1024_aarch64/verify.h | 4 +- .../mlkem-native_ml-kem-1024_aarch64/zetas.c | 9 + .../arith_backend.h | 2 + .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c | 106 ++--- .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h | 35 +- .../mlkem-native_ml-kem-1024_ref/cbmc.h | 12 +- .../mlkem-native_ml-kem-1024_ref/common.h | 17 +- .../mlkem-native_ml-kem-1024_ref/config.h | 92 ++-- .../mlkem-native_ml-kem-1024_ref/debug.c | 60 +++ .../mlkem-native_ml-kem-1024_ref/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-1024_ref/indcpa.c | 170 +------- .../mlkem-native_ml-kem-1024_ref/indcpa.h | 8 +- .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.c | 4 +- .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.h | 9 + .../mlkem_native.h | 14 +- .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c | 74 ++-- .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h | 7 +- .../mlkem-native_ml-kem-1024_ref/params.h | 23 +- .../mlkem-native_ml-kem-1024_ref/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-1024_ref/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-1024_ref/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-1024_ref/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-1024_ref/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../mlkem-native_ml-kem-1024_ref/symmetric.h | 1 + .../mlkem-native_ml-kem-1024_ref/verify.c | 11 +- .../mlkem-native_ml-kem-1024_ref/verify.h | 4 +- .../mlkem-native_ml-kem-1024_ref/zetas.c | 9 + .../arith_backend.h | 2 + .../mlkem-native_ml-kem-1024_x86_64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-1024_x86_64/cbd.h | 35 +- .../mlkem-native_ml-kem-1024_x86_64/cbmc.h | 12 +- .../mlkem-native_ml-kem-1024_x86_64/common.h | 17 +- .../mlkem-native_ml-kem-1024_x86_64/config.h | 92 ++-- .../mlkem-native_ml-kem-1024_x86_64/debug.c | 60 +++ .../mlkem-native_ml-kem-1024_x86_64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-1024_x86_64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-1024_x86_64/indcpa.h | 8 +- .../mlkem-native_ml-kem-1024_x86_64/kem.c | 4 +- .../mlkem-native_ml-kem-1024_x86_64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-1024_x86_64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-1024_x86_64/ntt.h | 7 +- .../mlkem-native_ml-kem-1024_x86_64/params.h | 23 +- .../mlkem-native_ml-kem-1024_x86_64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-1024_x86_64/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-1024_x86_64/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-1024_x86_64/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-1024_x86_64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-1024_x86_64/verify.c | 11 +- .../mlkem-native_ml-kem-1024_x86_64/verify.h | 4 +- .../x86_64/src/arith_native_x86_64.h | 2 +- .../x86_64/src/default_impl.h | 3 - .../mlkem-native_ml-kem-1024_x86_64/zetas.c | 9 + .../aarch64/src/arith_native_aarch64.h | 4 +- .../aarch64/src/clean_impl.h | 1 - .../aarch64/src/intt_clean.S | 85 ++-- .../aarch64/src/intt_opt.S | 85 ++-- .../aarch64/src/ntt_clean.S | 63 ++- .../aarch64/src/ntt_opt.S | 63 ++- .../aarch64/src/opt_impl.h | 2 - .../aarch64/src/poly_clean.S | 78 ++-- .../aarch64/src/poly_opt.S | 78 ++-- .../aarch64/src/polyvec_clean.S | 97 +++-- .../aarch64/src/polyvec_opt.S | 97 +++-- .../aarch64/src/rej_uniform_asm_clean.S | 79 +++- .../arith_backend.h | 2 + .../mlkem-native_ml-kem-512_aarch64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-512_aarch64/cbd.h | 35 +- .../mlkem-native_ml-kem-512_aarch64/cbmc.h | 12 +- .../mlkem-native_ml-kem-512_aarch64/common.h | 17 +- .../mlkem-native_ml-kem-512_aarch64/config.h | 92 ++-- .../mlkem-native_ml-kem-512_aarch64/debug.c | 60 +++ .../mlkem-native_ml-kem-512_aarch64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-512_aarch64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-512_aarch64/indcpa.h | 8 +- .../mlkem-native_ml-kem-512_aarch64/kem.c | 4 +- .../mlkem-native_ml-kem-512_aarch64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-512_aarch64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-512_aarch64/ntt.h | 7 +- .../mlkem-native_ml-kem-512_aarch64/params.h | 23 +- .../mlkem-native_ml-kem-512_aarch64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-512_aarch64/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-512_aarch64/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-512_aarch64/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-512_aarch64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-512_aarch64/verify.c | 11 +- .../mlkem-native_ml-kem-512_aarch64/verify.h | 4 +- .../mlkem-native_ml-kem-512_aarch64/zetas.c | 9 + .../arith_backend.h | 2 + .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.c | 106 ++--- .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.h | 35 +- .../ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h | 12 +- .../mlkem-native_ml-kem-512_ref/common.h | 17 +- .../mlkem-native_ml-kem-512_ref/config.h | 92 ++-- .../mlkem-native_ml-kem-512_ref/debug.c | 60 +++ .../mlkem-native_ml-kem-512_ref/debug.h | 130 ++++++ .../mlkem-native_ml-kem-512_ref/debug/debug.c | 56 --- .../mlkem-native_ml-kem-512_ref/debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-512_ref/indcpa.c | 170 +------- .../mlkem-native_ml-kem-512_ref/indcpa.h | 8 +- .../ml_kem/mlkem-native_ml-kem-512_ref/kem.c | 4 +- .../ml_kem/mlkem-native_ml-kem-512_ref/kem.h | 9 + .../mlkem_native.h | 14 +- .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.c | 74 ++-- .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.h | 7 +- .../mlkem-native_ml-kem-512_ref/params.h | 23 +- .../ml_kem/mlkem-native_ml-kem-512_ref/poly.c | 404 ++++++++---------- .../ml_kem/mlkem-native_ml-kem-512_ref/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-512_ref/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-512_ref/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-512_ref/reduce.h | 19 +- .../mlkem-native_ml-kem-512_ref/rej_uniform.c | 209 +++++++-- .../mlkem-native_ml-kem-512_ref/rej_uniform.h | 85 ++-- .../mlkem-native_ml-kem-512_ref/symmetric.h | 1 + .../mlkem-native_ml-kem-512_ref/verify.c | 11 +- .../mlkem-native_ml-kem-512_ref/verify.h | 4 +- .../mlkem-native_ml-kem-512_ref/zetas.c | 9 + .../arith_backend.h | 2 + .../mlkem-native_ml-kem-512_x86_64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-512_x86_64/cbd.h | 35 +- .../mlkem-native_ml-kem-512_x86_64/cbmc.h | 12 +- .../mlkem-native_ml-kem-512_x86_64/common.h | 17 +- .../mlkem-native_ml-kem-512_x86_64/config.h | 92 ++-- .../mlkem-native_ml-kem-512_x86_64/debug.c | 60 +++ .../mlkem-native_ml-kem-512_x86_64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-512_x86_64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-512_x86_64/indcpa.h | 8 +- .../mlkem-native_ml-kem-512_x86_64/kem.c | 4 +- .../mlkem-native_ml-kem-512_x86_64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-512_x86_64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-512_x86_64/ntt.h | 7 +- .../mlkem-native_ml-kem-512_x86_64/params.h | 23 +- .../mlkem-native_ml-kem-512_x86_64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-512_x86_64/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-512_x86_64/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-512_x86_64/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-512_x86_64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-512_x86_64/verify.c | 11 +- .../mlkem-native_ml-kem-512_x86_64/verify.h | 4 +- .../x86_64/src/arith_native_x86_64.h | 2 +- .../x86_64/src/default_impl.h | 3 - .../mlkem-native_ml-kem-512_x86_64/zetas.c | 9 + .../aarch64/src/arith_native_aarch64.h | 4 +- .../aarch64/src/clean_impl.h | 1 - .../aarch64/src/intt_clean.S | 85 ++-- .../aarch64/src/intt_opt.S | 85 ++-- .../aarch64/src/ntt_clean.S | 63 ++- .../aarch64/src/ntt_opt.S | 63 ++- .../aarch64/src/opt_impl.h | 2 - .../aarch64/src/poly_clean.S | 78 ++-- .../aarch64/src/poly_opt.S | 78 ++-- .../aarch64/src/polyvec_clean.S | 97 +++-- .../aarch64/src/polyvec_opt.S | 97 +++-- .../aarch64/src/rej_uniform_asm_clean.S | 79 +++- .../arith_backend.h | 2 + .../mlkem-native_ml-kem-768_aarch64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-768_aarch64/cbd.h | 35 +- .../mlkem-native_ml-kem-768_aarch64/cbmc.h | 12 +- .../mlkem-native_ml-kem-768_aarch64/common.h | 17 +- .../mlkem-native_ml-kem-768_aarch64/config.h | 92 ++-- .../mlkem-native_ml-kem-768_aarch64/debug.c | 60 +++ .../mlkem-native_ml-kem-768_aarch64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-768_aarch64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-768_aarch64/indcpa.h | 8 +- .../mlkem-native_ml-kem-768_aarch64/kem.c | 4 +- .../mlkem-native_ml-kem-768_aarch64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-768_aarch64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-768_aarch64/ntt.h | 7 +- .../mlkem-native_ml-kem-768_aarch64/params.h | 23 +- .../mlkem-native_ml-kem-768_aarch64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-768_aarch64/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-768_aarch64/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-768_aarch64/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-768_aarch64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-768_aarch64/verify.c | 11 +- .../mlkem-native_ml-kem-768_aarch64/verify.h | 4 +- .../mlkem-native_ml-kem-768_aarch64/zetas.c | 9 + .../arith_backend.h | 2 + .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.c | 106 ++--- .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.h | 35 +- .../ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h | 12 +- .../mlkem-native_ml-kem-768_ref/common.h | 17 +- .../mlkem-native_ml-kem-768_ref/config.h | 92 ++-- .../mlkem-native_ml-kem-768_ref/debug.c | 60 +++ .../mlkem-native_ml-kem-768_ref/debug.h | 130 ++++++ .../mlkem-native_ml-kem-768_ref/debug/debug.c | 56 --- .../mlkem-native_ml-kem-768_ref/debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-768_ref/indcpa.c | 170 +------- .../mlkem-native_ml-kem-768_ref/indcpa.h | 8 +- .../ml_kem/mlkem-native_ml-kem-768_ref/kem.c | 4 +- .../ml_kem/mlkem-native_ml-kem-768_ref/kem.h | 9 + .../mlkem_native.h | 14 +- .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.c | 74 ++-- .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.h | 7 +- .../mlkem-native_ml-kem-768_ref/params.h | 23 +- .../ml_kem/mlkem-native_ml-kem-768_ref/poly.c | 404 ++++++++---------- .../ml_kem/mlkem-native_ml-kem-768_ref/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-768_ref/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-768_ref/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-768_ref/reduce.h | 19 +- .../mlkem-native_ml-kem-768_ref/rej_uniform.c | 209 +++++++-- .../mlkem-native_ml-kem-768_ref/rej_uniform.h | 85 ++-- .../mlkem-native_ml-kem-768_ref/symmetric.h | 1 + .../mlkem-native_ml-kem-768_ref/verify.c | 11 +- .../mlkem-native_ml-kem-768_ref/verify.h | 4 +- .../mlkem-native_ml-kem-768_ref/zetas.c | 9 + .../arith_backend.h | 2 + .../mlkem-native_ml-kem-768_x86_64/cbd.c | 106 ++--- .../mlkem-native_ml-kem-768_x86_64/cbd.h | 35 +- .../mlkem-native_ml-kem-768_x86_64/cbmc.h | 12 +- .../mlkem-native_ml-kem-768_x86_64/common.h | 17 +- .../mlkem-native_ml-kem-768_x86_64/config.h | 92 ++-- .../mlkem-native_ml-kem-768_x86_64/debug.c | 60 +++ .../mlkem-native_ml-kem-768_x86_64/debug.h | 130 ++++++ .../debug/debug.c | 56 --- .../debug/debug.h | 224 ---------- .../mlkem-native_ml-kem-768_x86_64/indcpa.c | 170 +------- .../mlkem-native_ml-kem-768_x86_64/indcpa.h | 8 +- .../mlkem-native_ml-kem-768_x86_64/kem.c | 4 +- .../mlkem-native_ml-kem-768_x86_64/kem.h | 9 + .../mlkem_native.h | 14 +- .../mlkem-native_ml-kem-768_x86_64/ntt.c | 74 ++-- .../mlkem-native_ml-kem-768_x86_64/ntt.h | 7 +- .../mlkem-native_ml-kem-768_x86_64/params.h | 23 +- .../mlkem-native_ml-kem-768_x86_64/poly.c | 404 ++++++++---------- .../mlkem-native_ml-kem-768_x86_64/poly.h | 301 +++++-------- .../mlkem-native_ml-kem-768_x86_64/polyvec.c | 194 ++++++++- .../mlkem-native_ml-kem-768_x86_64/polyvec.h | 293 ++++++++++++- .../mlkem-native_ml-kem-768_x86_64/reduce.h | 19 +- .../rej_uniform.c | 209 +++++++-- .../rej_uniform.h | 85 ++-- .../symmetric.h | 1 + .../mlkem-native_ml-kem-768_x86_64/verify.c | 11 +- .../mlkem-native_ml-kem-768_x86_64/verify.h | 4 +- .../x86_64/src/arith_native_x86_64.h | 2 +- .../x86_64/src/default_impl.h | 3 - .../mlkem-native_ml-kem-768_x86_64/zetas.c | 9 + 304 files changed, 14028 insertions(+), 11358 deletions(-) create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.h delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.c delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt index edd305ce8..fc2655ddf 100644 --- a/src/kem/ml_kem/CMakeLists.txt +++ b/src/kem/ml_kem/CMakeLists.txt @@ -6,7 +6,7 @@ set(_ML_KEM_OBJS "") if(OQS_ENABLE_KEM_ml_kem_512) - add_library(ml_kem_512_ref OBJECT kem_ml_kem_512.c mlkem-native_ml-kem-512_ref/cbd.c mlkem-native_ml-kem-512_ref/debug/debug.c mlkem-native_ml-kem-512_ref/indcpa.c mlkem-native_ml-kem-512_ref/kem.c mlkem-native_ml-kem-512_ref/ntt.c mlkem-native_ml-kem-512_ref/poly.c mlkem-native_ml-kem-512_ref/polyvec.c mlkem-native_ml-kem-512_ref/rej_uniform.c mlkem-native_ml-kem-512_ref/verify.c mlkem-native_ml-kem-512_ref/zetas.c) + add_library(ml_kem_512_ref OBJECT kem_ml_kem_512.c mlkem-native_ml-kem-512_ref/cbd.c mlkem-native_ml-kem-512_ref/debug.c mlkem-native_ml-kem-512_ref/indcpa.c mlkem-native_ml-kem-512_ref/kem.c mlkem-native_ml-kem-512_ref/ntt.c mlkem-native_ml-kem-512_ref/poly.c mlkem-native_ml-kem-512_ref/polyvec.c mlkem-native_ml-kem-512_ref/rej_uniform.c mlkem-native_ml-kem-512_ref/verify.c mlkem-native_ml-kem-512_ref/zetas.c) target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_C) target_include_directories(ml_kem_512_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_ref) target_include_directories(ml_kem_512_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) @@ -15,7 +15,7 @@ if(OQS_ENABLE_KEM_ml_kem_512) endif() if(OQS_ENABLE_KEM_ml_kem_512_x86_64) - add_library(ml_kem_512_x86_64 OBJECT mlkem-native_ml-kem-512_x86_64/cbd.c mlkem-native_ml-kem-512_x86_64/debug/debug.c mlkem-native_ml-kem-512_x86_64/indcpa.c mlkem-native_ml-kem-512_x86_64/kem.c mlkem-native_ml-kem-512_x86_64/ntt.c mlkem-native_ml-kem-512_x86_64/poly.c mlkem-native_ml-kem-512_x86_64/polyvec.c mlkem-native_ml-kem-512_x86_64/rej_uniform.c mlkem-native_ml-kem-512_x86_64/verify.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-512_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-512_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-512_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-512_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-512_x86_64/zetas.c) + add_library(ml_kem_512_x86_64 OBJECT mlkem-native_ml-kem-512_x86_64/cbd.c mlkem-native_ml-kem-512_x86_64/debug.c mlkem-native_ml-kem-512_x86_64/indcpa.c mlkem-native_ml-kem-512_x86_64/kem.c mlkem-native_ml-kem-512_x86_64/ntt.c mlkem-native_ml-kem-512_x86_64/poly.c mlkem-native_ml-kem-512_x86_64/polyvec.c mlkem-native_ml-kem-512_x86_64/rej_uniform.c mlkem-native_ml-kem-512_x86_64/verify.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-512_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-512_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-512_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-512_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-512_x86_64/zetas.c) target_include_directories(ml_kem_512_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_x86_64) target_include_directories(ml_kem_512_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_512_x86_64 PRIVATE -mavx2 -mbmi2 -mpopcnt ) @@ -24,7 +24,7 @@ if(OQS_ENABLE_KEM_ml_kem_512_x86_64) endif() if(OQS_ENABLE_KEM_ml_kem_512_aarch64) - add_library(ml_kem_512_aarch64 OBJECT mlkem-native_ml-kem-512_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-512_aarch64/cbd.c mlkem-native_ml-kem-512_aarch64/debug/debug.c mlkem-native_ml-kem-512_aarch64/indcpa.c mlkem-native_ml-kem-512_aarch64/kem.c mlkem-native_ml-kem-512_aarch64/ntt.c mlkem-native_ml-kem-512_aarch64/poly.c mlkem-native_ml-kem-512_aarch64/polyvec.c mlkem-native_ml-kem-512_aarch64/rej_uniform.c mlkem-native_ml-kem-512_aarch64/verify.c mlkem-native_ml-kem-512_aarch64/zetas.c) + add_library(ml_kem_512_aarch64 OBJECT mlkem-native_ml-kem-512_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-512_aarch64/cbd.c mlkem-native_ml-kem-512_aarch64/debug.c mlkem-native_ml-kem-512_aarch64/indcpa.c mlkem-native_ml-kem-512_aarch64/kem.c mlkem-native_ml-kem-512_aarch64/ntt.c mlkem-native_ml-kem-512_aarch64/poly.c mlkem-native_ml-kem-512_aarch64/polyvec.c mlkem-native_ml-kem-512_aarch64/rej_uniform.c mlkem-native_ml-kem-512_aarch64/verify.c mlkem-native_ml-kem-512_aarch64/zetas.c) target_include_directories(ml_kem_512_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_aarch64) target_include_directories(ml_kem_512_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_512_aarch64 PUBLIC -DMLKEM_K=2 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT) @@ -32,7 +32,7 @@ if(OQS_ENABLE_KEM_ml_kem_512_aarch64) endif() if(OQS_ENABLE_KEM_ml_kem_768) - add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c mlkem-native_ml-kem-768_ref/cbd.c mlkem-native_ml-kem-768_ref/debug/debug.c mlkem-native_ml-kem-768_ref/indcpa.c mlkem-native_ml-kem-768_ref/kem.c mlkem-native_ml-kem-768_ref/ntt.c mlkem-native_ml-kem-768_ref/poly.c mlkem-native_ml-kem-768_ref/polyvec.c mlkem-native_ml-kem-768_ref/rej_uniform.c mlkem-native_ml-kem-768_ref/verify.c mlkem-native_ml-kem-768_ref/zetas.c) + add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c mlkem-native_ml-kem-768_ref/cbd.c mlkem-native_ml-kem-768_ref/debug.c mlkem-native_ml-kem-768_ref/indcpa.c mlkem-native_ml-kem-768_ref/kem.c mlkem-native_ml-kem-768_ref/ntt.c mlkem-native_ml-kem-768_ref/poly.c mlkem-native_ml-kem-768_ref/polyvec.c mlkem-native_ml-kem-768_ref/rej_uniform.c mlkem-native_ml-kem-768_ref/verify.c mlkem-native_ml-kem-768_ref/zetas.c) target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_C) target_include_directories(ml_kem_768_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_ref) target_include_directories(ml_kem_768_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) @@ -41,7 +41,7 @@ if(OQS_ENABLE_KEM_ml_kem_768) endif() if(OQS_ENABLE_KEM_ml_kem_768_x86_64) - add_library(ml_kem_768_x86_64 OBJECT mlkem-native_ml-kem-768_x86_64/cbd.c mlkem-native_ml-kem-768_x86_64/debug/debug.c mlkem-native_ml-kem-768_x86_64/indcpa.c mlkem-native_ml-kem-768_x86_64/kem.c mlkem-native_ml-kem-768_x86_64/ntt.c mlkem-native_ml-kem-768_x86_64/poly.c mlkem-native_ml-kem-768_x86_64/polyvec.c mlkem-native_ml-kem-768_x86_64/rej_uniform.c mlkem-native_ml-kem-768_x86_64/verify.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-768_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-768_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-768_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-768_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-768_x86_64/zetas.c) + add_library(ml_kem_768_x86_64 OBJECT mlkem-native_ml-kem-768_x86_64/cbd.c mlkem-native_ml-kem-768_x86_64/debug.c mlkem-native_ml-kem-768_x86_64/indcpa.c mlkem-native_ml-kem-768_x86_64/kem.c mlkem-native_ml-kem-768_x86_64/ntt.c mlkem-native_ml-kem-768_x86_64/poly.c mlkem-native_ml-kem-768_x86_64/polyvec.c mlkem-native_ml-kem-768_x86_64/rej_uniform.c mlkem-native_ml-kem-768_x86_64/verify.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-768_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-768_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-768_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-768_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-768_x86_64/zetas.c) target_include_directories(ml_kem_768_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_x86_64) target_include_directories(ml_kem_768_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_768_x86_64 PRIVATE -mavx2 -mbmi2 -mpopcnt ) @@ -50,7 +50,7 @@ if(OQS_ENABLE_KEM_ml_kem_768_x86_64) endif() if(OQS_ENABLE_KEM_ml_kem_768_aarch64) - add_library(ml_kem_768_aarch64 OBJECT mlkem-native_ml-kem-768_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-768_aarch64/cbd.c mlkem-native_ml-kem-768_aarch64/debug/debug.c mlkem-native_ml-kem-768_aarch64/indcpa.c mlkem-native_ml-kem-768_aarch64/kem.c mlkem-native_ml-kem-768_aarch64/ntt.c mlkem-native_ml-kem-768_aarch64/poly.c mlkem-native_ml-kem-768_aarch64/polyvec.c mlkem-native_ml-kem-768_aarch64/rej_uniform.c mlkem-native_ml-kem-768_aarch64/verify.c mlkem-native_ml-kem-768_aarch64/zetas.c) + add_library(ml_kem_768_aarch64 OBJECT mlkem-native_ml-kem-768_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-768_aarch64/cbd.c mlkem-native_ml-kem-768_aarch64/debug.c mlkem-native_ml-kem-768_aarch64/indcpa.c mlkem-native_ml-kem-768_aarch64/kem.c mlkem-native_ml-kem-768_aarch64/ntt.c mlkem-native_ml-kem-768_aarch64/poly.c mlkem-native_ml-kem-768_aarch64/polyvec.c mlkem-native_ml-kem-768_aarch64/rej_uniform.c mlkem-native_ml-kem-768_aarch64/verify.c mlkem-native_ml-kem-768_aarch64/zetas.c) target_include_directories(ml_kem_768_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_aarch64) target_include_directories(ml_kem_768_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_768_aarch64 PUBLIC -DMLKEM_K=3 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_AARCH64_OPT) @@ -58,7 +58,7 @@ if(OQS_ENABLE_KEM_ml_kem_768_aarch64) endif() if(OQS_ENABLE_KEM_ml_kem_1024) - add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c mlkem-native_ml-kem-1024_ref/cbd.c mlkem-native_ml-kem-1024_ref/debug/debug.c mlkem-native_ml-kem-1024_ref/indcpa.c mlkem-native_ml-kem-1024_ref/kem.c mlkem-native_ml-kem-1024_ref/ntt.c mlkem-native_ml-kem-1024_ref/poly.c mlkem-native_ml-kem-1024_ref/polyvec.c mlkem-native_ml-kem-1024_ref/rej_uniform.c mlkem-native_ml-kem-1024_ref/verify.c mlkem-native_ml-kem-1024_ref/zetas.c) + add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c mlkem-native_ml-kem-1024_ref/cbd.c mlkem-native_ml-kem-1024_ref/debug.c mlkem-native_ml-kem-1024_ref/indcpa.c mlkem-native_ml-kem-1024_ref/kem.c mlkem-native_ml-kem-1024_ref/ntt.c mlkem-native_ml-kem-1024_ref/poly.c mlkem-native_ml-kem-1024_ref/polyvec.c mlkem-native_ml-kem-1024_ref/rej_uniform.c mlkem-native_ml-kem-1024_ref/verify.c mlkem-native_ml-kem-1024_ref/zetas.c) target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_C) target_include_directories(ml_kem_1024_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_ref) target_include_directories(ml_kem_1024_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) @@ -67,7 +67,7 @@ if(OQS_ENABLE_KEM_ml_kem_1024) endif() if(OQS_ENABLE_KEM_ml_kem_1024_x86_64) - add_library(ml_kem_1024_x86_64 OBJECT mlkem-native_ml-kem-1024_x86_64/cbd.c mlkem-native_ml-kem-1024_x86_64/debug/debug.c mlkem-native_ml-kem-1024_x86_64/indcpa.c mlkem-native_ml-kem-1024_x86_64/kem.c mlkem-native_ml-kem-1024_x86_64/ntt.c mlkem-native_ml-kem-1024_x86_64/poly.c mlkem-native_ml-kem-1024_x86_64/polyvec.c mlkem-native_ml-kem-1024_x86_64/rej_uniform.c mlkem-native_ml-kem-1024_x86_64/verify.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-1024_x86_64/zetas.c) + add_library(ml_kem_1024_x86_64 OBJECT mlkem-native_ml-kem-1024_x86_64/cbd.c mlkem-native_ml-kem-1024_x86_64/debug.c mlkem-native_ml-kem-1024_x86_64/indcpa.c mlkem-native_ml-kem-1024_x86_64/kem.c mlkem-native_ml-kem-1024_x86_64/ntt.c mlkem-native_ml-kem-1024_x86_64/poly.c mlkem-native_ml-kem-1024_x86_64/polyvec.c mlkem-native_ml-kem-1024_x86_64/rej_uniform.c mlkem-native_ml-kem-1024_x86_64/verify.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-1024_x86_64/zetas.c) target_include_directories(ml_kem_1024_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_x86_64) target_include_directories(ml_kem_1024_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_1024_x86_64 PRIVATE -mavx2 -mbmi2 -mpopcnt ) @@ -76,7 +76,7 @@ if(OQS_ENABLE_KEM_ml_kem_1024_x86_64) endif() if(OQS_ENABLE_KEM_ml_kem_1024_aarch64) - add_library(ml_kem_1024_aarch64 OBJECT mlkem-native_ml-kem-1024_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_aarch64/cbd.c mlkem-native_ml-kem-1024_aarch64/debug/debug.c mlkem-native_ml-kem-1024_aarch64/indcpa.c mlkem-native_ml-kem-1024_aarch64/kem.c mlkem-native_ml-kem-1024_aarch64/ntt.c mlkem-native_ml-kem-1024_aarch64/poly.c mlkem-native_ml-kem-1024_aarch64/polyvec.c mlkem-native_ml-kem-1024_aarch64/rej_uniform.c mlkem-native_ml-kem-1024_aarch64/verify.c mlkem-native_ml-kem-1024_aarch64/zetas.c) + add_library(ml_kem_1024_aarch64 OBJECT mlkem-native_ml-kem-1024_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_aarch64/cbd.c mlkem-native_ml-kem-1024_aarch64/debug.c mlkem-native_ml-kem-1024_aarch64/indcpa.c mlkem-native_ml-kem-1024_aarch64/kem.c mlkem-native_ml-kem-1024_aarch64/ntt.c mlkem-native_ml-kem-1024_aarch64/poly.c mlkem-native_ml-kem-1024_aarch64/polyvec.c mlkem-native_ml-kem-1024_aarch64/rej_uniform.c mlkem-native_ml-kem-1024_aarch64/verify.c mlkem-native_ml-kem-1024_aarch64/zetas.c) target_include_directories(ml_kem_1024_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_aarch64) target_include_directories(ml_kem_1024_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) target_compile_options(ml_kem_1024_aarch64 PUBLIC -DMLKEM_K=4 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_AARCH64_OPT) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/arith_native_aarch64.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/arith_native_aarch64.h index 6a5ee8a7d..fc4e7dd38 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/arith_native_aarch64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/arith_native_aarch64.h @@ -75,14 +75,14 @@ void poly_tobytes_asm_clean(uint8_t *r, const int16_t *a); void poly_tobytes_asm_opt(uint8_t *r, const int16_t *a); #define polyvec_basemul_acc_montgomery_cached_asm_clean \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) void polyvec_basemul_acc_montgomery_cached_asm_clean(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); #define polyvec_basemul_acc_montgomery_cached_asm_opt \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) void polyvec_basemul_acc_montgomery_cached_asm_opt(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/clean_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/clean_impl.h index b0ff3d597..548b1eebb 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/clean_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/clean_impl.h @@ -31,7 +31,6 @@ static INLINE void ntt_native(poly *data) aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_clean(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S index 623a82ae9..b243a569d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_clean): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -361,4 +341,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S index e332efef8..c94746e17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_opt): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -1017,4 +997,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S index 877a5f689..cd63cc4d6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -156,7 +156,6 @@ q_root2_tw .req q6 consts .req v7 - q_consts .req q7 tmp .req v24 t0 .req v25 @@ -167,21 +166,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_clean) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_clean): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -280,4 +271,46 @@ layer3456_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S index 15103a595..8705615b7 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -167,21 +167,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_opt) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_opt): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -916,4 +908,47 @@ MLKEM_ASM_NAMESPACE(ntt_asm_opt): pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq q_consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/opt_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/opt_impl.h index b22674026..ec1bf6587 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/opt_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/opt_impl.h @@ -25,14 +25,12 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_REJ_UNIFORM -#define NTT_BOUND_NATIVE (6 * MLKEM_Q) static INLINE void ntt_native(poly *data) { ntt_asm_opt(data->coeffs, aarch64_ntt_zetas_layer01234, aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_opt(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S index f70a40221..809f9667e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 loop_start: @@ -115,6 +90,7 @@ loop_start: .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -122,9 +98,7 @@ loop_start: .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -137,6 +111,7 @@ loop_start: zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -152,13 +127,14 @@ loop_start: q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 mulcache_compute_loop_start: @@ -185,6 +161,7 @@ mulcache_compute_loop_start: .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -200,9 +177,7 @@ mulcache_compute_loop_start: .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -261,6 +236,7 @@ poly_tobytes_asm_clean_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -268,22 +244,25 @@ poly_tobytes_asm_clean_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 poly_tomont_asm_loop: @@ -311,6 +290,7 @@ poly_tomont_asm_loop: .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -318,13 +298,9 @@ poly_tomont_asm_loop: .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S index e58ee77c4..815a9dd1a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 // Instructions: 15 @@ -278,6 +253,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -285,9 +261,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -300,6 +274,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -315,13 +290,14 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 // Instructions: 7 @@ -426,6 +402,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -441,9 +418,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -502,6 +477,7 @@ poly_tobytes_asm_opt_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -509,22 +485,25 @@ poly_tobytes_asm_opt_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 // Instructions: 5 @@ -670,6 +649,7 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -677,13 +657,9 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S index 99fb05de5..c91675b44 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -198,12 +176,15 @@ k2_loop_start: #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -237,12 +218,15 @@ k3_loop_start: #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -285,4 +269,39 @@ k4_loop_start: ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq wtmp + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S index 16ed77c3f..8300b682c 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -530,12 +508,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1001,12 +982,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1581,4 +1565,39 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq wtmp + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S index 722dc0f49..5151a05d0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S @@ -45,6 +45,7 @@ len .req w4 /* Temporary output on the stack */ + xtmp .req x7 output_tmp .req x7 output_tmp_base .req x8 @@ -110,20 +111,26 @@ mlkem_q .req v30 bits .req v31 - bits_q .req q31 .text -/* Literal pool */ -.p2align 4 -c_bit_table: - .short 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - .align 4 .global MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean) MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean): push_stack - ldr bits_q, c_bit_table + // Load 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + movz xtmp, 0x1 + movk xtmp, 0x2, lsl 16 + movk xtmp, 0x4, lsl 32 + movk xtmp, 0x8, lsl 48 + mov bits.d[0], xtmp + + movz xtmp, 0x10 + movk xtmp, 0x20, lsl 16 + movk xtmp, 0x40, lsl 32 + movk xtmp, 0x80, lsl 48 + mov bits.d[1], xtmp + movz tmp, #MLKEM_Q dup mlkem_q.8h, tmp @@ -337,5 +344,63 @@ return: pop_stack ret + +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq output + .unreq buf + .unreq buflen + .unreq table_idx + .unreq len + .unreq output_tmp + .unreq output_tmp_base + .unreq count + .unreq buf_consumed + .unreq tmp + .unreq xtmp + .unreq final_copy_count + .unreq rec_idx_0 + .unreq rec_idx_1 + .unreq rec_idx_2 + .unreq rec_idx_3 + .unreq ctr0 + .unreq ctr1 + .unreq ctr2 + .unreq ctr3 + .unreq ctr01 + .unreq ctr23 + .unreq buf0 + .unreq buf1 + .unreq buf2 + .unreq tmp0 + .unreq tmp1 + .unreq tmp2 + .unreq tmp3 + .unreq sign0 + .unreq sign1 + .unreq sign2 + .unreq sign3 + .unreq val0 + .unreq val0q + .unreq val1 + .unreq val1q + .unreq val2 + .unreq val2q + .unreq val3 + .unreq val3q + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq table0 + .unreq table0q + .unreq table1 + .unreq table1q + .unreq table2 + .unreq table2q + .unreq table3 + .unreq table3q + .unreq mlkem_q + .unreq bits + #endif /* defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) || defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h index ce13e7911..25e00a930 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h @@ -42,7 +42,7 @@ void basemul_avx2(__m256i *r, const __m256i *a, const __m256i *b, const __m256i *qdata); #define polyvec_basemul_acc_montgomery_cached_avx2 \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_avx2) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_avx2) void polyvec_basemul_acc_montgomery_cached_avx2( poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/default_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/default_impl.h index 66de8c85f..029111c17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/default_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/default_impl.h @@ -28,9 +28,6 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_POLY_FROMBYTES -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) -#define NTT_BOUND_NATIVE (8 * MLKEM_Q) - static INLINE void poly_permute_bitrev_to_custom(poly *data) { nttunpack_avx2((__m256i *)(data->coeffs), qdata.vec); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/arith_native_aarch64.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/arith_native_aarch64.h index 6a5ee8a7d..fc4e7dd38 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/arith_native_aarch64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/arith_native_aarch64.h @@ -75,14 +75,14 @@ void poly_tobytes_asm_clean(uint8_t *r, const int16_t *a); void poly_tobytes_asm_opt(uint8_t *r, const int16_t *a); #define polyvec_basemul_acc_montgomery_cached_asm_clean \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) void polyvec_basemul_acc_montgomery_cached_asm_clean(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); #define polyvec_basemul_acc_montgomery_cached_asm_opt \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) void polyvec_basemul_acc_montgomery_cached_asm_opt(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/clean_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/clean_impl.h index b0ff3d597..548b1eebb 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/clean_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/clean_impl.h @@ -31,7 +31,6 @@ static INLINE void ntt_native(poly *data) aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_clean(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S index 623a82ae9..b243a569d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_clean): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -361,4 +341,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S index e332efef8..c94746e17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_opt): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -1017,4 +997,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S index 877a5f689..cd63cc4d6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -156,7 +156,6 @@ q_root2_tw .req q6 consts .req v7 - q_consts .req q7 tmp .req v24 t0 .req v25 @@ -167,21 +166,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_clean) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_clean): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -280,4 +271,46 @@ layer3456_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S index 15103a595..8705615b7 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -167,21 +167,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_opt) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_opt): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -916,4 +908,47 @@ MLKEM_ASM_NAMESPACE(ntt_asm_opt): pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq q_consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/opt_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/opt_impl.h index b22674026..ec1bf6587 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/opt_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/opt_impl.h @@ -25,14 +25,12 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_REJ_UNIFORM -#define NTT_BOUND_NATIVE (6 * MLKEM_Q) static INLINE void ntt_native(poly *data) { ntt_asm_opt(data->coeffs, aarch64_ntt_zetas_layer01234, aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_opt(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S index f70a40221..809f9667e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 loop_start: @@ -115,6 +90,7 @@ loop_start: .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -122,9 +98,7 @@ loop_start: .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -137,6 +111,7 @@ loop_start: zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -152,13 +127,14 @@ loop_start: q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 mulcache_compute_loop_start: @@ -185,6 +161,7 @@ mulcache_compute_loop_start: .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -200,9 +177,7 @@ mulcache_compute_loop_start: .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -261,6 +236,7 @@ poly_tobytes_asm_clean_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -268,22 +244,25 @@ poly_tobytes_asm_clean_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 poly_tomont_asm_loop: @@ -311,6 +290,7 @@ poly_tomont_asm_loop: .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -318,13 +298,9 @@ poly_tomont_asm_loop: .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S index e58ee77c4..815a9dd1a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 // Instructions: 15 @@ -278,6 +253,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -285,9 +261,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -300,6 +274,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -315,13 +290,14 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 // Instructions: 7 @@ -426,6 +402,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -441,9 +418,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -502,6 +477,7 @@ poly_tobytes_asm_opt_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -509,22 +485,25 @@ poly_tobytes_asm_opt_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 // Instructions: 5 @@ -670,6 +649,7 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -677,13 +657,9 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S index 99fb05de5..c91675b44 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -198,12 +176,15 @@ k2_loop_start: #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -237,12 +218,15 @@ k3_loop_start: #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -285,4 +269,39 @@ k4_loop_start: ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq wtmp + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S index 16ed77c3f..8300b682c 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -530,12 +508,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1001,12 +982,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1581,4 +1565,39 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq wtmp + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S index 722dc0f49..5151a05d0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S @@ -45,6 +45,7 @@ len .req w4 /* Temporary output on the stack */ + xtmp .req x7 output_tmp .req x7 output_tmp_base .req x8 @@ -110,20 +111,26 @@ mlkem_q .req v30 bits .req v31 - bits_q .req q31 .text -/* Literal pool */ -.p2align 4 -c_bit_table: - .short 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - .align 4 .global MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean) MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean): push_stack - ldr bits_q, c_bit_table + // Load 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + movz xtmp, 0x1 + movk xtmp, 0x2, lsl 16 + movk xtmp, 0x4, lsl 32 + movk xtmp, 0x8, lsl 48 + mov bits.d[0], xtmp + + movz xtmp, 0x10 + movk xtmp, 0x20, lsl 16 + movk xtmp, 0x40, lsl 32 + movk xtmp, 0x80, lsl 48 + mov bits.d[1], xtmp + movz tmp, #MLKEM_Q dup mlkem_q.8h, tmp @@ -337,5 +344,63 @@ return: pop_stack ret + +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq output + .unreq buf + .unreq buflen + .unreq table_idx + .unreq len + .unreq output_tmp + .unreq output_tmp_base + .unreq count + .unreq buf_consumed + .unreq tmp + .unreq xtmp + .unreq final_copy_count + .unreq rec_idx_0 + .unreq rec_idx_1 + .unreq rec_idx_2 + .unreq rec_idx_3 + .unreq ctr0 + .unreq ctr1 + .unreq ctr2 + .unreq ctr3 + .unreq ctr01 + .unreq ctr23 + .unreq buf0 + .unreq buf1 + .unreq buf2 + .unreq tmp0 + .unreq tmp1 + .unreq tmp2 + .unreq tmp3 + .unreq sign0 + .unreq sign1 + .unreq sign2 + .unreq sign3 + .unreq val0 + .unreq val0q + .unreq val1 + .unreq val1q + .unreq val2 + .unreq val2q + .unreq val3 + .unreq val3q + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq table0 + .unreq table0q + .unreq table1 + .unreq table1q + .unreq table2 + .unreq table2q + .unreq table3 + .unreq table3q + .unreq mlkem_q + .unreq bits + #endif /* defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) || defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h index ce13e7911..25e00a930 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h @@ -42,7 +42,7 @@ void basemul_avx2(__m256i *r, const __m256i *a, const __m256i *b, const __m256i *qdata); #define polyvec_basemul_acc_montgomery_cached_avx2 \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_avx2) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_avx2) void polyvec_basemul_acc_montgomery_cached_avx2( poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/default_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/default_impl.h index 66de8c85f..029111c17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/default_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/default_impl.h @@ -28,9 +28,6 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_POLY_FROMBYTES -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) -#define NTT_BOUND_NATIVE (8 * MLKEM_Q) - static INLINE void poly_permute_bitrev_to_custom(poly *data) { nttunpack_avx2((__m256i *)(data->coeffs), qdata.vec); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/arith_native_aarch64.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/arith_native_aarch64.h index 6a5ee8a7d..fc4e7dd38 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/arith_native_aarch64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/arith_native_aarch64.h @@ -75,14 +75,14 @@ void poly_tobytes_asm_clean(uint8_t *r, const int16_t *a); void poly_tobytes_asm_opt(uint8_t *r, const int16_t *a); #define polyvec_basemul_acc_montgomery_cached_asm_clean \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) void polyvec_basemul_acc_montgomery_cached_asm_clean(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); #define polyvec_basemul_acc_montgomery_cached_asm_opt \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) void polyvec_basemul_acc_montgomery_cached_asm_opt(int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/clean_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/clean_impl.h index b0ff3d597..548b1eebb 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/clean_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/clean_impl.h @@ -31,7 +31,6 @@ static INLINE void ntt_native(poly *data) aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_clean(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S index 623a82ae9..b243a569d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_clean): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -361,4 +341,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S index e332efef8..c94746e17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S @@ -149,7 +149,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -193,40 +193,20 @@ t3 .req v28 ninv .req v29 - q_ninv .req q29 ninv_tw .req v30 - q_ninv_tw .req q30 - -/* Literal pool */ -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_consts: .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 -c_ninv: dup8h 512 -c_ninv_tw: dup8h 5040 MLKEM_ASM_NAMESPACE(intt_asm_opt): push_stack - ldr q_consts, c_consts - ldr q_ninv, c_ninv - ldr q_ninv_tw, c_ninv_tw + // Setup constants + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp + mov wtmp, #512 + dup ninv.8h, wtmp + mov wtmp, #5040 + dup ninv_tw.8h, wtmp mov inp, in mov count, #8 @@ -1017,4 +997,49 @@ layer012_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq consts + .unreq q_consts + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq ninv + .unreq ninv_tw + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S index 877a5f689..cd63cc4d6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -156,7 +156,6 @@ q_root2_tw .req q6 consts .req v7 - q_consts .req q7 tmp .req v24 t0 .req v25 @@ -167,21 +166,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_clean) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_clean): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -280,4 +271,46 @@ layer3456_start: pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S index 15103a595..8705615b7 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S @@ -121,7 +121,7 @@ inp .req x3 count .req x4 - xtmp .req x5 + wtmp .req w5 data0 .req v8 data1 .req v9 @@ -167,21 +167,13 @@ .text .global MLKEM_ASM_NAMESPACE(ntt_asm_opt) -/* Literal pool */ -.p2align 4 -c_consts: - .short 3329 - .short 20159 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - .short 0 - MLKEM_ASM_NAMESPACE(ntt_asm_opt): push_stack - ldr q_consts, c_consts + + mov wtmp, #3329 + mov consts.h[0], wtmp + mov wtmp, #20159 + mov consts.h[1], wtmp mov inp, in mov count, #4 @@ -916,4 +908,47 @@ MLKEM_ASM_NAMESPACE(ntt_asm_opt): pop_stack ret +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq in + .unreq r01234_ptr + .unreq r56_ptr + .unreq inp + .unreq count + .unreq wtmp + .unreq data0 + .unreq data1 + .unreq data2 + .unreq data3 + .unreq data4 + .unreq data5 + .unreq data6 + .unreq data7 + .unreq q_data0 + .unreq q_data1 + .unreq q_data2 + .unreq q_data3 + .unreq q_data4 + .unreq q_data5 + .unreq q_data6 + .unreq q_data7 + .unreq root0 + .unreq root1 + .unreq root2 + .unreq root0_tw + .unreq root1_tw + .unreq root2_tw + .unreq q_root0 + .unreq q_root1 + .unreq q_root2 + .unreq q_root0_tw + .unreq q_root1_tw + .unreq q_root2_tw + .unreq consts + .unreq q_consts + .unreq tmp + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/opt_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/opt_impl.h index b22674026..ec1bf6587 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/opt_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/opt_impl.h @@ -25,14 +25,12 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_REJ_UNIFORM -#define NTT_BOUND_NATIVE (6 * MLKEM_Q) static INLINE void ntt_native(poly *data) { ntt_asm_opt(data->coeffs, aarch64_ntt_zetas_layer01234, aarch64_ntt_zetas_layer56); } -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) static INLINE void intt_native(poly *data) { intt_asm_opt(data->coeffs, aarch64_invntt_zetas_layer01234, diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S index f70a40221..809f9667e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 loop_start: @@ -115,6 +90,7 @@ loop_start: .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -122,9 +98,7 @@ loop_start: .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -137,6 +111,7 @@ loop_start: zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -152,13 +127,14 @@ loop_start: q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 mulcache_compute_loop_start: @@ -185,6 +161,7 @@ mulcache_compute_loop_start: .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -200,9 +177,7 @@ mulcache_compute_loop_start: .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -261,6 +236,7 @@ poly_tobytes_asm_clean_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -268,22 +244,25 @@ poly_tobytes_asm_clean_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_clean): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 poly_tomont_asm_loop: @@ -311,6 +290,7 @@ poly_tomont_asm_loop: .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -318,13 +298,9 @@ poly_tomont_asm_loop: .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S index e58ee77c4..815a9dd1a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S @@ -6,33 +6,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 20159 // Barrett twist of 1 wrt 2^27 -c_mont_constant: dup8h -1044 // 2^16 % 3329 -c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) - /* * Some modular arithmetic macros */ @@ -70,6 +43,7 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) ptr .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -77,14 +51,15 @@ c_barrett_twist: dup8h -10276 // Barrett twist of -1044 (wrt 2^16) tmp .req v1 mask .req v2 modulus .req v3 - q_modulus .req q3 modulus_twisted .req v4 - q_modulus_twisted .req q4 MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp mov count, #8 // Instructions: 15 @@ -278,6 +253,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq ptr .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -285,9 +261,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): .unreq tmp .unreq mask .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_mulcache_compute() * @@ -300,6 +274,7 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): zeta_ptr .req x2 zeta_twisted_ptr .req x3 count .req x4 + wtmp .req w5 data_odd .req v0 zeta .req v1 @@ -315,13 +290,14 @@ MLKEM_ASM_NAMESPACE(poly_reduce_asm_opt): q_dst .req q5 modulus .req v6 - q_modulus .req q6 modulus_twisted .req v7 - q_modulus_twisted .req q7 MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #20159 + dup modulus_twisted.8h, wtmp mov count, #16 // Instructions: 7 @@ -426,6 +402,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq zeta_ptr .unreq zeta_twisted_ptr .unreq count + .unreq wtmp .unreq data_odd .unreq zeta @@ -441,9 +418,7 @@ MLKEM_ASM_NAMESPACE(poly_mulcache_compute_asm_opt): .unreq q_dst .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted /******************************************** * poly_tobytes() * @@ -502,6 +477,7 @@ poly_tobytes_asm_opt_asm_loop_start: src .req x0 count .req x1 + wtmp .req w2 data .req v0 q_data .req q0 @@ -509,22 +485,25 @@ poly_tobytes_asm_opt_asm_loop_start: q_res .req q1 factor .req v2 - q_factor .req q2 factor_t .req v3 - q_factor_t .req q3 modulus .req v4 - q_modulus .req q4 modulus_twisted .req v5 - q_modulus_twisted .req q5 tmp0 .req v6 MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted - ldr q_factor, c_mont_constant - ldr q_factor_t, c_barrett_twist + mov wtmp, #3329 // ML-KEM modulus + dup modulus.8h, wtmp + + mov wtmp, #20159 // Barrett twist of 1 wrt 2^27 + dup modulus_twisted.8h, wtmp + + mov wtmp, #-1044 // 2^16 % 3329 + dup factor.8h, wtmp + + mov wtmp, #-10276 // Barrett twist of -1044 (wrt 2^16) + dup factor_t.8h, wtmp mov count, #8 // Instructions: 5 @@ -670,6 +649,7 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq src .unreq count + .unreq wtmp .unreq data .unreq q_data @@ -677,13 +657,9 @@ MLKEM_ASM_NAMESPACE(poly_tomont_asm_opt): .unreq q_res .unreq factor - .unreq q_factor .unreq factor_t - .unreq q_factor_t .unreq modulus - .unreq q_modulus .unreq modulus_twisted - .unreq q_modulus_twisted .unreq tmp0 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S index 99fb05de5..c91675b44 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -198,12 +176,15 @@ k2_loop_start: #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -237,12 +218,15 @@ k3_loop_start: #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_clean): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -285,4 +269,39 @@ k4_loop_start: ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq wtmp + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S index 16ed77c3f..8300b682c 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S @@ -12,31 +12,6 @@ #include "common.h" #if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) -/* We use a single literal pool for all functions in this file. - * This is OK even when the file gets expanded through SLOTHY, - * since PC-relative offets are up to 1MB in AArch64. - * - * The use of dup8h to build constant vectors in memory - * is slightly wasteful and could be avoided with a GPR-load - * followed by Neon `dup`, but we're ultimately only talking - * about 64 bytes, so it seems OK. - */ - -.macro dup8h c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c - .short \c -.endm - -.p2align 4 -c_modulus: dup8h 3329 // ML-KEM modulus -c_modulus_twisted: dup8h 3327 - // Input: // - Vectors al, ah of 32-bit entries // Output: @@ -136,11 +111,10 @@ c_modulus_twisted: dup8h 3327 b3_ptr .req x11 b3_cache_ptr .req x12 count .req x13 + wtmp .req w14 modulus .req v0 - q_modulus .req q0 modulus_twisted .req v2 - q_modulus_twisted .req q2 aa0 .req v3 aa1 .req v4 @@ -164,12 +138,16 @@ c_modulus_twisted: dup8h 3327 t0 .req v28 #if MLKEM_K == 2 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -530,12 +508,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 2 */ #if MLKEM_K == 3 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1001,12 +982,15 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): #endif /* MLKEM_K == 3 */ #if MLKEM_K == 4 -.global MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt) +.global MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt) -MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): +MLKEM_ASM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_asm_opt): push_stack - ldr q_modulus, c_modulus - ldr q_modulus_twisted, c_modulus_twisted + mov wtmp, #3329 + dup modulus.8h, wtmp + + mov wtmp, #3327 + dup modulus_twisted.8h, wtmp // Computed bases of vector entries @@ -1581,4 +1565,39 @@ MLKEM_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt): ret #endif /* MLKEM_K == 4 */ +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq out + .unreq a0_ptr + .unreq b0_ptr + .unreq b0_cache_ptr + .unreq a1_ptr + .unreq b1_ptr + .unreq b1_cache_ptr + .unreq a2_ptr + .unreq b2_ptr + .unreq b2_cache_ptr + .unreq a3_ptr + .unreq b3_ptr + .unreq b3_cache_ptr + .unreq count + .unreq modulus + .unreq modulus_twisted + .unreq wtmp + .unreq aa0 + .unreq aa1 + .unreq bb0 + .unreq bb1 + .unreq bb1t + .unreq res0l + .unreq res1l + .unreq res0h + .unreq res1h + .unreq tmp0 + .unreq tmp1 + .unreq q_tmp0 + .unreq q_tmp1 + .unreq out0 + .unreq out1 + .unreq t0 + #endif /* MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S index 722dc0f49..5151a05d0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S @@ -45,6 +45,7 @@ len .req w4 /* Temporary output on the stack */ + xtmp .req x7 output_tmp .req x7 output_tmp_base .req x8 @@ -110,20 +111,26 @@ mlkem_q .req v30 bits .req v31 - bits_q .req q31 .text -/* Literal pool */ -.p2align 4 -c_bit_table: - .short 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - .align 4 .global MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean) MLKEM_ASM_NAMESPACE(rej_uniform_asm_clean): push_stack - ldr bits_q, c_bit_table + // Load 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + movz xtmp, 0x1 + movk xtmp, 0x2, lsl 16 + movk xtmp, 0x4, lsl 32 + movk xtmp, 0x8, lsl 48 + mov bits.d[0], xtmp + + movz xtmp, 0x10 + movk xtmp, 0x20, lsl 16 + movk xtmp, 0x40, lsl 32 + movk xtmp, 0x80, lsl 48 + mov bits.d[1], xtmp + movz tmp, #MLKEM_Q dup mlkem_q.8h, tmp @@ -337,5 +344,63 @@ return: pop_stack ret + +/****************** REGISTER DEALLOCATIONS *******************/ + .unreq output + .unreq buf + .unreq buflen + .unreq table_idx + .unreq len + .unreq output_tmp + .unreq output_tmp_base + .unreq count + .unreq buf_consumed + .unreq tmp + .unreq xtmp + .unreq final_copy_count + .unreq rec_idx_0 + .unreq rec_idx_1 + .unreq rec_idx_2 + .unreq rec_idx_3 + .unreq ctr0 + .unreq ctr1 + .unreq ctr2 + .unreq ctr3 + .unreq ctr01 + .unreq ctr23 + .unreq buf0 + .unreq buf1 + .unreq buf2 + .unreq tmp0 + .unreq tmp1 + .unreq tmp2 + .unreq tmp3 + .unreq sign0 + .unreq sign1 + .unreq sign2 + .unreq sign3 + .unreq val0 + .unreq val0q + .unreq val1 + .unreq val1q + .unreq val2 + .unreq val2q + .unreq val3 + .unreq val3q + .unreq t0 + .unreq t1 + .unreq t2 + .unreq t3 + .unreq table0 + .unreq table0q + .unreq table1 + .unreq table1q + .unreq table2 + .unreq table2q + .unreq table3 + .unreq table3q + .unreq mlkem_q + .unreq bits + #endif /* defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) || defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_OPT) */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h index 09e30f207..0543b1bd1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h @@ -16,7 +16,9 @@ * * Keep this _after_ the inclusion of the backend; otherwise, * the sanity checks won't have an effect. */ +#if defined(MLKEM_NATIVE_CHECK_APIS) #include "api.h" #endif +#endif #endif /* MLKEM_NATIVE_ARITH_IMPL_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c index 433bdc954..1e6b7c5d1 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c @@ -2,8 +2,11 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include "cbd.h" +#include "common.h" +#ifndef MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + #include +#include "cbd.h" /* Static namespacing * This is to facilitate building multiple instances @@ -11,8 +14,6 @@ * within a single compilation unit. */ #define load32_littleendian MLKEM_NAMESPACE(load32_littleendian) #define load24_littleendian MLKEM_NAMESPACE(load24_littleendian) -#define cbd2 MLKEM_NAMESPACE(cbd2) -#define cbd3 MLKEM_NAMESPACE(cbd3) /* End of static namespacing */ /************************************************* @@ -35,44 +36,13 @@ static uint32_t load32_littleendian(const uint8_t x[4]) return r; } -#if MLKEM_ETA1 == 3 -/************************************************* - * Name: load24_littleendian - * - * Description: load 3 bytes into a 32-bit integer - * in little-endian order. - * This function is only needed for ML-KEM-512 - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) - **************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif /* MLKEM_ETA1 == 3 */ - -/************************************************* - * Name: cbd2 - * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array - **************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +MLKEM_NATIVE_INTERNAL_API +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) { unsigned j; @@ -82,7 +52,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) { const int16_t a = (d >> (4 * j + 0)) & 0x3; @@ -92,24 +62,34 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) } } -#if MLKEM_ETA1 == 3 +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 /************************************************* - * Name: cbd3 + * Name: load24_littleendian * - * Description: Given an array of uniformly random bytes, compute - * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter eta=3. + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. * This function is only needed for ML-KEM-512 * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *buf: pointer to input byte array + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +MLKEM_NATIVE_INTERNAL_API +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) { unsigned j; @@ -120,7 +100,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) for (j = 0; j < 4; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(i <= MLKEM_N / 4 && j <= 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) { const int16_t a = (d >> (6 * j + 0)) & 0x7; @@ -129,28 +109,12 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) } } } -#endif /* MLKEM_ETA1 == 3 */ +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \ + 3 */ -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +#define empty_cu_cbd MLKEM_NAMESPACE_K(empty_cu_cbd) +int empty_cu_cbd; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h index 15db89570..54c1f5b90 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h @@ -9,46 +9,35 @@ #include "common.h" #include "poly.h" -#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +#define poly_cbd2 MLKEM_NAMESPACE(poly_cbd2) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA1. + * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) -); +void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]); -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3 +#define poly_cbd3 MLKEM_NAMESPACE(poly_cbd3) /************************************************* - * Name: poly_cbd_eta1 + * Name: poly_cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to - * a centered binomial distribution with parameter MLKEM_ETA2. + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ +void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */ -#endif +#endif /* CBD_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h index baa0bfa9f..52b95bc3f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h @@ -13,7 +13,7 @@ #define __contract__(x) #define __loop__(x) -#define cassert(x, y) +#define cassert(x) #else /* CBMC _is_ defined, therefore we're doing proof */ @@ -30,7 +30,7 @@ #define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) #define decreases(...) __CPROVER_decreases(__VA_ARGS__) /* cassert to avoid confusion with in-built assert */ -#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") #define assume(...) __CPROVER_assume(__VA_ARGS__) /*************************************************** @@ -119,13 +119,13 @@ { \ unsigned qvar; \ ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ - (((value_lb) <= (array_var[(qvar)])) && \ - ((array_var[(qvar)]) < (value_ub))) \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ } #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ - (qvar_ub), (array_var), (value_lb), (value_ub)) + (qvar_ub), (array_var), (value_lb), (value_ub)) /* clang-format on */ /* Wrapper around array_bound operating on absolute values. @@ -134,6 +134,6 @@ * bound in array_bound is inclusive, we have to raise it by 1. */ #define array_abs_bound(arr, lb, ub, k) \ - array_bound((arr), (lb), (ub), -(k) + 1, (k)) + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h index da886780c..4f326333e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h @@ -43,23 +43,30 @@ #define MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) x1##_##x2 #define MLKEM_NATIVE_MAKE_NAMESPACE(x1, x2) MLKEM_NATIVE_MAKE_NAMESPACE_(x1, x2) -#define FIPS202_NAMESPACE(s) \ - MLKEM_NATIVE_MAKE_NAMESPACE(FIPS202_NAMESPACE_PREFIX, s) - #define MLKEM_NAMESPACE(s) \ MLKEM_NATIVE_MAKE_NAMESPACE(MLKEM_NAMESPACE_PREFIX, s) +#if defined(MLKEM_NAMESPACE_PREFIX_ADD_LEVEL) +#define MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3 +#define MLKEM_NATIVE_MAKE_NAMESPACE_K(x1, x2, x3) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K_(x1, x2, x3) +#define MLKEM_NAMESPACE_K(s) \ + MLKEM_NATIVE_MAKE_NAMESPACE_K(MLKEM_NAMESPACE_PREFIX, MLKEM_LVL, s) +#else +#define MLKEM_NAMESPACE_K(s) MLKEM_NAMESPACE(s) +#endif + /* On Apple platforms, we need to emit leading underscore * in front of assembly symbols. We thus introducee a separate * namespace wrapper for ASM symbols. */ #if !defined(__APPLE__) #define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym) -#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym) +#define MLKEM_ASM_NAMESPACE_K(sym) MLKEM_NAMESPACE_K(sym) #else #define PREFIX_UNDERSCORE_(sym) _##sym #define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym) #define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym)) -#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym)) +#define MLKEM_ASM_NAMESPACE_K(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE_K(sym)) #endif #endif /* MLKEM_NATIVE_COMMON_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h index d1441835b..fa89370ce 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h @@ -40,10 +40,12 @@ /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */ /****************************************************************************** - * Name: MLKEM_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX * - * Description: The prefix to use to namespace global symbols - * from mlkem/. + * Description: The prefix to use to namespace global symbols from mlkem/. + * + * Level-dependent symbols will additionally be prefixed with the + * security level if MLKEM_NAMESPACE_PREFIX_ADD_LEVEL is set. * * This can also be set using CFLAGS. * @@ -53,17 +55,71 @@ #endif /****************************************************************************** - * Name: FIPS202_NAMESPACE + * Name: MLKEM_NAMESPACE_PREFIX_ADD_LEVEL + * + * Description: If set, the level (512, 768, 1024) is added to the namespace + * prefix MLKEM_NAMESPACE_PREFIX for all functions which are + * level-dependent. Level-independent functions will have there + * symbol prefixed by MLKEM_NAMESPACE_PREFIX only. * - * Description: The prefix to use to namespace global symbols - * from mlkem/fips202/. + * This is intended to be used for multi-level builds where + * level-independent code should be shared across levels. * * This can also be set using CFLAGS. * *****************************************************************************/ -#if !defined(FIPS202_NAMESPACE_PREFIX) -#define FIPS202_NAMESPACE_PREFIX FIPS202_DEFAULT_NAMESPACE_PREFIX -#endif +/* #define MLKEM_NAMESPACE_PREFIX_ADD_LEVEL */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, all MLKEM_K-independent code will be included + * in the build, including code needed only for other security + * levels. + * + * Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if + * this option is set for a build with MLKEM_K==3/4, it would + * be included. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED */ + +/****************************************************************************** + * Name: MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED + * + * Description: This is for multi-level builds of mlkem-native only. If you + * need only a single security level build of mlkem-native, + * keep this unset. + * + * If this is set, no MLKEM_K-independent code will be included + * in the build. + * + * To build mlkem-native with support for all security levels, + * build it three times -- once per level -- and set the option + * MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED for exactly one of + * them, and MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED for the + * others. + * + * See examples/multilevel_build for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ /****************************************************************************** * Name: MLKEM_USE_NATIVE @@ -112,25 +168,13 @@ /* Default namespace * * Don't change this. If you need a different namespace, re-define - * MLKEM_NAMESPACE above instead, and remove the following. - */ - -/* - * The default FIPS202 namespace is - * - * PQCP_MLKEM_NATIVE_FIPS202__ + * MLKEM_NAMESPACE_PREFIX above instead, and remove the following. * - * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_ - */ - -#define FIPS202_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_FIPS202 - -/* * The default MLKEM namespace is * - * PQCP_MLKEM_NATIVE_MLKEM__ + * PQCP_MLKEM_NATIVE_MLKEM_ * - * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_ + * e.g., PQCP_MLKEM_NATIVE_MLKEM512_ */ #if MLKEM_K == 2 diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.c new file mode 100644 index 000000000..4b4857cbc --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlkem_debug_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLKEM_NATIVE_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +#else /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ + +#define empty_cu_debug MLKEM_NAMESPACE_K(empty_cu_debug) +int empty_cu_debug; + +#endif /* !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.h new file mode 100644 index 000000000..1103124db --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) +void mlkem_debug_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) +void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as poly or polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb)-1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) + +#include "../cbmc.h" + +#define debug_assert(val) cassert(val) + +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* MLKEM_DEBUG */ + +#define debug_assert(val) \ + do \ + { \ + } while (0) +#define debug_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define debug_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* MLKEM_DEBUG */ +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.c deleted file mode 100644 index 64294ebe1..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#include "../common.h" - -#if defined(MLKEM_DEBUG) - -#include -#include "debug.h" - -#define MLKEM_NATIVE_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " - -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val) -{ - if (val == 0) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER "Assertion failed: %s (value %d)\n", - file, line, description, val); - exit(1); - } -} - -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive) -{ - int err = 0; - unsigned i; - for (i = 0; i < len; i++) - { - int16_t val = ptr[i]; - if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) - { - fprintf(stderr, - MLKEM_NATIVE_DEBUG_ERROR_HEADER - "%s, index %u, value %d out of bounds (%d,%d)\n", - file, line, description, i, (int)val, lower_bound_exclusive, - upper_bound_exclusive); - err = 1; - } - } - - if (err == 1) - exit(1); -} - -#else /* MLKEM_DEBUG */ - -#define empty_cu_debug MLKEM_NAMESPACE(empty_cu_debug) -int empty_cu_debug; - -#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h deleted file mode 100644 index 5ce320ea2..000000000 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2024 The mlkem-native project authors - * SPDX-License-Identifier: Apache-2.0 - */ -#ifndef MLKEM_DEBUG_H -#define MLKEM_DEBUG_H - -#include "../common.h" - -#if defined(MLKEM_DEBUG) -#include -#include -#include - -/************************************************* - * Name: mlkem_debug_assert - * - * Description: Check debug assertion - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of assertion - * - val: Value asserted to be non-zero - **************************************************/ -#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert) -void mlkem_debug_assert(const char *file, int line, const char *description, - const int val); - -/************************************************* - * Name: mlkem_debug_check_bounds - * - * Description: Check whether values in an array of int16_t - * are within specified bounds. - * - * Prints an error message to stderr and calls - * exit(1) if not. - * - * Arguments: - file: filename - * - line: line number - * - description: Textual description of check - * - ptr: Base of array to be checked - * - len: Number of int16_t in ptr - * - lower_bound_exclusive: Exclusive lower bound - * - upper_bound_exclusive: Exclusive upper bound - **************************************************/ -#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds) -void mlkem_debug_check_bounds(const char *file, int line, - const char *description, const int16_t *ptr, - unsigned len, int lower_bound_exclusive, - int upper_bound_exclusive); - -/* Check assertion, calling exit() upon failure - * - * val: Value that's asserted to be non-zero - * msg: Message to print on failure - * - * Currently called CASSERT to avoid clash with CBMC assert. - */ -#define CASSERT(val, msg) \ - do \ - { \ - mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ - } while (0) - -/* Check absolute bounds of scalar - * val: Scalar to be checked - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define SCALAR_BOUND(val, abs_bound, msg) \ - CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) - -/* Check that all coefficients in array of int16_t's are non-negative - * and below an exclusive upper bound. - * - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * high_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -1, ((high_bound))); \ - } while (0) - -/* Check absolute bounds in array of int16_t's - * ptr: Base of array, expression of type int16_t* - * len: Number of int16_t in array - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ - (len), -(abs_bound), (abs_bound)); \ - } while (0) - -/* Check absolute bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check - * msg: Message to print on failure */ -#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ - BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ - msg) - -/* Check unsigned bounds on coefficients in polynomial or mulcache - * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - * msg: Message to print on failure */ -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ - msg) - -/* Check absolute bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLY_BOUND(ptr, abs_bound) \ - POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) - -/* Check unsigned bounds on coefficients in polynomial - * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLY_UBOUND(ptr, ubound) \ - POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) - -/* Check absolute bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * abs_bound: Exclusive upper bound on absolute value to check */ -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ - "polyvec absolute bound for " #ptr ".vec[i]"); \ - } while (0) - -/* Check unsigned bounds on coefficients in vector of polynomials - * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check - * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. - */ -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - unsigned _debug_polyvec_bound_idx; \ - for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ - _debug_polyvec_bound_idx++) \ - POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ - "polyvec unsigned bound for " #ptr ".vec[i]"); \ - } while (0) - -#define MLKEM_CONCAT_(left, right) left##right -#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) - -/* Following AWS-LC to define a C99-compliant static assert */ -#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ - typedef struct \ - { \ - unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ - } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg) \ - __attribute__((unused)); - -#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ - MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) -#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) -#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ - MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) -#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) - -#else /* MLKEM_DEBUG */ - -#define CASSERT(val, msg) \ - do \ - { \ - } while (0) -#define SCALAR_BOUND(val, abs_bound, msg) \ - do \ - { \ - } while (0) -#define BOUND(ptr, len, abs_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLYVEC_BOUND(ptr, abs_bound) \ - do \ - { \ - } while (0) -#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ - do \ - { \ - } while (0) -#define UBOUND(ptr, len, high_bound, msg) \ - do \ - { \ - } while (0) -#define POLY_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLYVEC_UBOUND(ptr, ubound) \ - do \ - { \ - } while (0) -#define POLY_UBOUND_MSG(ptr, ubound, msg) \ - do \ - { \ - } while (0) -#define STATIC_ASSERT(cond, error) - -#endif /* MLKEM_DEBUG */ - -#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c index 4d3133e14..0cfcc3e9e 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c @@ -17,7 +17,7 @@ #include "symmetric.h" #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "cbmc.h" @@ -25,15 +25,13 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define pack_pk MLKEM_NAMESPACE(pack_pk) -#define unpack_pk MLKEM_NAMESPACE(unpack_pk) -#define pack_sk MLKEM_NAMESPACE(pack_sk) -#define unpack_sk MLKEM_NAMESPACE(unpack_sk) -#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext) -#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext) -#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4) -#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry) -#define matvec_mul MLKEM_NAMESPACE(matvec_mul) +#define pack_pk MLKEM_NAMESPACE_K(pack_pk) +#define unpack_pk MLKEM_NAMESPACE_K(unpack_pk) +#define pack_sk MLKEM_NAMESPACE_K(pack_sk) +#define unpack_sk MLKEM_NAMESPACE_K(unpack_sk) +#define pack_ciphertext MLKEM_NAMESPACE_K(pack_ciphertext) +#define unpack_ciphertext MLKEM_NAMESPACE_K(unpack_ciphertext) +#define matvec_mul MLKEM_NAMESPACE_K(matvec_mul) /* End of static namespacing */ /************************************************* @@ -51,7 +49,7 @@ static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[MLKEM_SYMBYTES]) { - POLYVEC_BOUND(pk, MLKEM_Q); + debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, pk); memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); } @@ -77,7 +75,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], /* NOTE: If a modulus check was conducted on the PK, we know at this * point that the coefficients of `pk` are unsigned canonical. The * specifications and proofs, however, do _not_ assume this, and instead - * work with the easily provable bound by 4096. */ + * work with the easily provable bound by UINT12_LIMIT. */ } /************************************************* @@ -91,7 +89,7 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], **************************************************/ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) { - POLYVEC_BOUND(sk, MLKEM_Q); + debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q); polyvec_tobytes(r, sk); } @@ -145,131 +143,11 @@ static void unpack_ciphertext(polyvec *b, poly *v, poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); } -#ifndef MLKEM_GEN_MATRIX_NBLOCKS -#define MLKEM_GEN_MATRIX_NBLOCKS \ - ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) -#endif - -/* - * Generate four A matrix entries from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) -__contract__( - requires(memory_no_alias(vec, sizeof(poly) * 4)) - requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) - requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) - requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) - assigns(memory_slice(vec, sizeof(poly) * 4)) - ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - /* Temporary buffers for XOF output before rejection sampling */ - uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - - /* Tracks the number of coefficients we have already sampled */ - unsigned int ctr[KECCAK_WAY]; - xof_x4_ctx statex; - unsigned int buflen; - - shake128x4_inc_init(&statex); - - /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ - xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], - MLKEM_SYMBYTES + 2); - - /* - * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - * This should generate the matrix entries with high probability. - */ - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, - &statex); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); - - /* - * So long as not all matrix entries have been generated, squeeze - * one more block a time until we're done. - */ - buflen = XOF_RATE; - while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || - ctr[3] < MLKEM_N) - __loop__( - assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), - object_whole(buf1), object_whole(buf2), object_whole(buf3)) - invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) - invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) - invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) - invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) - invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) - invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) - { - xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); - ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); - ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); - ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); - ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); - } - - xof_x4_release(&statex); -} - -/* - * Generate a single A matrix entry from a seed, using rejection - * sampling on the output of a XOF. - */ -static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) -__contract__( - requires(memory_no_alias(entry, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) - assigns(memory_slice(entry, sizeof(poly))) - ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) -{ - xof_ctx state; - uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; - unsigned int ctr, buflen; - - shake128_inc_init(&state); - xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); - - /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. - */ - /* This should generate the matrix entry with high probability. */ - xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); - buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; - ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); - - /* Squeeze + sample one more block a time until we're done */ - buflen = XOF_RATE; - while (ctr < MLKEM_N) - __loop__( - assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) - invariant(0 <= ctr && ctr <= MLKEM_N) - invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr, - 0, MLKEM_Q))) - { - xof_squeezeblocks(buf, 1, &state); - ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); - } - - xof_release(&state); -} - #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) /* This namespacing is not done at the top to avoid a naming conflict * with native backends, which are currently not yet namespaced. */ #define poly_permute_bitrev_to_custom \ - MLKEM_NAMESPACE(poly_permute_bitrev_to_custom) + MLKEM_NAMESPACE_K(poly_permute_bitrev_to_custom) static INLINE void poly_permute_bitrev_to_custom(poly *data) __contract__( @@ -332,7 +210,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) * This call writes across polyvec boundaries for K=2 and K=3. * This is intentional and safe. */ - gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy); } /* For left over polynomial, we use single keccak. */ @@ -353,12 +231,11 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) seed0[MLKEM_SYMBYTES + 1] = x; } - gen_matrix_entry(&a[0].vec[0] + i, seed0); + poly_rej_uniform(&a[0].vec[0] + i, seed0); i++; } - cassert(i == MLKEM_K * MLKEM_K, - "gen_matrix: failed to generate whole matrix"); + debug_assert(i == MLKEM_K * MLKEM_K); /* * The public matrix is generated in NTT domain. If the native backend @@ -402,16 +279,12 @@ __contract__( for (i = 0; i < MLKEM_K; i++) __loop__( assigns(i, object_whole(out)) - invariant(i >= 0 && i <= MLKEM_K)) + invariant(i <= MLKEM_K)) { polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); } } - - -STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], @@ -461,7 +334,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], matvec_mul(&pkpv, a, &skpv, &skpv_cache); polyvec_tomont(&pkpv); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -471,11 +343,6 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } -/* Check that the arithmetic in indcpa_enc() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) -STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, - indcpa_enc_bound_1) - MLKEM_NATIVE_INTERNAL_API void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], const uint8_t m[MLKEM_INDCPA_MSGBYTES], @@ -522,7 +389,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], polyvec_invntt_tomont(&b); poly_invntt_tomont(&v); - /* Arithmetic cannot overflow, see static assertion at the top */ polyvec_add(&b, &ep); poly_add(&v, &epp); poly_add(&v, &k); @@ -533,9 +399,6 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], pack_ciphertext(c, &b, &v); } -/* Check that the arithmetic in indcpa_dec() does not overflow */ -STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) - MLKEM_NATIVE_INTERNAL_API void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], const uint8_t c[MLKEM_INDCPA_BYTES], @@ -551,7 +414,6 @@ void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], polyvec_basemul_acc_montgomery(&sb, &skpv, &b); poly_invntt_tomont(&sb); - /* Arithmetic cannot overflow, see static assertion at the top */ poly_sub(&v, &sb); poly_reduce(&v); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h index 011f1aa4f..2c4fda3c4 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h @@ -10,7 +10,7 @@ #include "common.h" #include "polyvec.h" -#define gen_matrix MLKEM_NAMESPACE(gen_matrix) +#define gen_matrix MLKEM_NAMESPACE_K(gen_matrix) /************************************************* * Name: gen_matrix * @@ -34,7 +34,7 @@ __contract__( array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))); ); -#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +#define indcpa_keypair_derand MLKEM_NAMESPACE_K(indcpa_keypair_derand) /************************************************* * Name: indcpa_keypair_derand * @@ -60,7 +60,7 @@ __contract__( assigns(object_whole(sk)) ); -#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +#define indcpa_enc MLKEM_NAMESPACE_K(indcpa_enc) /************************************************* * Name: indcpa_enc * @@ -89,7 +89,7 @@ __contract__( assigns(object_whole(c)) ); -#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +#define indcpa_dec MLKEM_NAMESPACE_K(indcpa_dec) /************************************************* * Name: indcpa_dec * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c index 5779d3273..88c3843be 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c @@ -16,8 +16,8 @@ * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ -#define check_pk MLKEM_NAMESPACE(check_pk) -#define check_sk MLKEM_NAMESPACE(check_sk) +#define check_pk MLKEM_NAMESPACE_K(check_pk) +#define check_sk MLKEM_NAMESPACE_K(check_sk) /* End of static namespacing */ #if defined(CBMC) diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h index 074e4771e..93caa796b 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h @@ -9,6 +9,7 @@ #include "cbmc.h" #include "common.h" +#if defined(MLKEM_NATIVE_CHECK_APIS) /* Include to ensure consistency between internal kem.h * and external mlkem_native.h. */ #include "mlkem_native.h" @@ -25,6 +26,14 @@ #error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h #endif +#else +#define crypto_kem_keypair_derand MLKEM_NAMESPACE_K(keypair_derand) +#define crypto_kem_keypair MLKEM_NAMESPACE_K(keypair) +#define crypto_kem_enc_derand MLKEM_NAMESPACE_K(enc_derand) +#define crypto_kem_enc MLKEM_NAMESPACE_K(enc) +#define crypto_kem_dec MLKEM_NAMESPACE_K(dec) +#endif + /************************************************* * Name: crypto_kem_keypair_derand * diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h index 4aed4efbb..12d1d12e6 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h @@ -59,9 +59,17 @@ #error MLKEM_NAMESPACE_PREFIX not set by config file #endif -#define BUILD_INFO_CONCAT_(x, y) x##_##y -#define BUILD_INFO_CONCAT(x, y) BUILD_INFO_CONCAT_(x, y) -#define BUILD_INFO_NAMESPACE(sym) BUILD_INFO_CONCAT(MLKEM_NAMESPACE_PREFIX, sym) +#if defined(MLKEM_NATIVE_NAMESPACE_PREFIX_ADD_LEVEL) +#define BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z +#define BUILD_INFO_CONCAT3(x, y, z) BUILD_INFO_CONCAT_(x, y, z) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT3(MLKEM_NAMESPACE_PREFIX, BUILD_INFO_LVL, sym) +#else +#define BUILD_INFO_CONCAT2_(x, y) x##_##y +#define BUILD_INFO_CONCAT2(x, y) BUILD_INFO_CONCAT2_(x, y) +#define BUILD_INFO_NAMESPACE(sym) \ + BUILD_INFO_CONCAT2(MLKEM_NAMESPACE_PREFIX, sym) +#endif #endif /* BUILD_INFO_LVL */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c index 02b45215c..3651c8da9 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c @@ -2,10 +2,12 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ -#include +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) +#include #include "arith_backend.h" -#include "debug/debug.h" +#include "debug.h" #include "ntt.h" #include "reduce.h" @@ -45,10 +47,10 @@ * 4 -- 6 * 5 -- 7 */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, - int len, int bound) +static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, int bound) __contract__( - requires(0 <= start && start < MLKEM_N) + requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) requires(-HALF_Q < zeta && zeta < HALF_Q) @@ -60,7 +62,7 @@ __contract__( ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) { /* `bound` is a ghost variable only needed in the CBMC specification */ - int j; + unsigned j; ((void)bound); for (j = start; j < start + len; j++) __loop__( @@ -93,7 +95,7 @@ __contract__( * official Kyber implementation here, merely adding `layer` as * a ghost variable for the specifications. */ -static void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) @@ -101,15 +103,15 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable only needed in the CBMC specification */ ((void)layer); /* Twiddle factors for layer n start at index 2^(layer-1) */ k = MLKEM_N / (2 * len); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( - invariant(0 <= start && start < MLKEM_N + 2 * len) - invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { @@ -130,9 +132,9 @@ __contract__( MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - int len, layer; + unsigned len, layer; int16_t *r; - POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) @@ -144,30 +146,23 @@ void poly_ntt(poly *p) } /* Check the stronger bound */ - POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #else /* MLKEM_USE_NATIVE_NTT */ -/* Check that bound for native NTT implies contractual bound */ -STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_ntt(poly *p) { - POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); ntt_native(p); - POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); + debug_assert_abs_bound(p, MLKEM_N, NTT_BOUND); } #endif /* MLKEM_USE_NATIVE_NTT */ #if !defined(MLKEM_USE_NATIVE_INTT) -/* Check that bound for reference invNTT implies contractual bound */ -#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) -STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) - /* Compute one layer of inverse NTT */ -static void invntt_layer(int16_t *r, int len, int layer) +static void invntt_layer(int16_t *r, unsigned len, unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) @@ -176,23 +171,23 @@ __contract__( assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { - int start, k; + unsigned start, k; /* `layer` is a ghost variable used only in the specification */ ((void)layer); k = MLKEM_N / len - 1; for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) { - int j; + unsigned j; int16_t zeta = zetas[k--]; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) - invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(start <= MLKEM_N && k <= 127) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { int16_t t = r[j]; @@ -211,13 +206,13 @@ void poly_invntt_tomont(poly *p) * and NTT twist. This also brings coefficients down to * absolute value < MLKEM_Q. */ - int j, len, layer; + unsigned j, len, layer; const int16_t f = 1441; int16_t *r = p->coeffs; for (j = 0; j < MLKEM_N; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N) + invariant(j <= MLKEM_N) invariant(array_abs_bound(r, 0, j, MLKEM_Q))) { r[j] = fqmul(r[j], f); @@ -226,24 +221,21 @@ void poly_invntt_tomont(poly *p) /* Run the invNTT layers */ for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) __loop__( - invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer))) invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) { invntt_layer(p->coeffs, len, layer); } - POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #else /* MLKEM_USE_NATIVE_INTT */ -/* Check that bound for native invNTT implies contractual bound */ -STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) - MLKEM_NATIVE_INTERNAL_API void poly_invntt_tomont(poly *p) { intt_native(p); - POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); + debug_assert_abs_bound(p, MLKEM_N, INVNTT_BOUND); } #endif /* MLKEM_USE_NATIVE_INTT */ @@ -252,8 +244,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t b_cached) { int32_t t0, t1; - - BOUND(a, 2, 4096, "basemul input bound"); + debug_assert_bound(a, 2, 0, UINT12_LIMIT); t0 = (int32_t)a[1] * b_cached; t0 += (int32_t)a[0] * b[0]; @@ -264,5 +255,12 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], r[0] = montgomery_reduce(t0); r[1] = montgomery_reduce(t1); - BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound"); + debug_assert_abs_bound(r, 2, 2 * MLKEM_Q); } + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_ntt MLKEM_NAMESPACE_K(empty_cu_ntt) +int empty_cu_ntt; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h index 5592bb9a2..4e80d3ab3 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h @@ -4,10 +4,10 @@ */ #ifndef NTT_H #define NTT_H +#include "common.h" #include #include "cbmc.h" -#include "common.h" #include "poly.h" #include "reduce.h" @@ -81,7 +81,7 @@ __contract__( * Upon return, coefficients are bound by * 2*MLKEM_Q in absolute value. * - a: Pointer to first input polynomial - * Must be coefficient-wise < 4096 in absolute value. + * Every coefficient must be in [0..4095] * - b: Pointer to second input polynomial * Can have arbitrary int16_t coefficients * - b_cached: Some precomputed value, typically derived from @@ -99,5 +99,4 @@ __contract__( ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q)) ); - -#endif +#endif /* NTT_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h index fa751f977..57ea4c8ba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h @@ -25,23 +25,34 @@ #define MLKEM_POLYBYTES 384 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + #if MLKEM_K == 2 #define MLKEM_LVL 512 #define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 3 #define MLKEM_LVL 768 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #elif MLKEM_K == 4 #define MLKEM_LVL 1024 #define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 -#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c index 5807879df..7483ebf6d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c @@ -2,13 +2,15 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) + #include #include - #include "arith_backend.h" #include "cbd.h" #include "cbmc.h" -#include "debug/debug.h" +#include "debug.h" #include "fips202x4.h" #include "ntt.h" #include "poly.h" @@ -16,50 +18,46 @@ #include "symmetric.h" #include "verify.h" +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + unsigned i; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) { - unsigned k; - uint16_t t[8]; - for (k = 0; k < 8; k++) + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) __loop__( - invariant(k >= 0 && k <= 8) - invariant(forall(r, 0, k, t[r] < (1u << 11)))) + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) { - t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); } - /* - * Make all implicit truncation explicit. No data is being - * truncated for the LHS's since each t[i] is 11-bit in size. - */ - r[11 * j + 0] = (t[0] >> 0) & 0xFF; - r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); - r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); - r[11 * j + 3] = (t[2] >> 2) & 0xFF; - r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); - r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); - r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); - r[11 * j + 7] = (t[5] >> 1) & 0xFF; - r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); - r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); - r[11 * j + 10] = (t[7] >> 3); + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); } +} -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a) +{ + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) - __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( - invariant(k >= 0 && k <= 4) + invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); @@ -75,51 +73,35 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif } - MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { - unsigned j; -#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) - for (j = 0; j < MLKEM_N / 8; j++) + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 8) - invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { - int k; - uint16_t t[8]; - uint8_t const *base = &a[11 * j]; - t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); - t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); - t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | - ((uint16_t)base[4] << 10)); - t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); - t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); - t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | - ((uint16_t)base[8] << 9)); - t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); - t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); - - for (k = 0; k < 8; k++) - __loop__( - invariant(0 <= k && k <= 8) - invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) - { - r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); - } + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +{ + unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( - invariant(0 <= j && j <= MLKEM_N / 4) + invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { - int k; + unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; @@ -130,51 +112,33 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) for (k = 0; k < 4; k++) __loop__( - invariant(0 <= k && k <= 4) + invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" -#endif + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) - { - unsigned j; - uint8_t t[8] = {0}; - for (j = 0; j < 8; j++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) - invariant(array_bound(t, 0, j, 0, 16))) - { - t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); - } - - r[i * 4] = t[0] | (t[1] << 4); - r[i * 4 + 1] = t[2] | (t[3] << 4); - r[i * 4 + 2] = t[4] | (t[5] << 4); - r[i * 4 + 3] = t[6] | (t[7] << 4); - } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); @@ -191,33 +155,57 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif } MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a) { - unsigned i; -#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) - for (i = 0; i < MLKEM_N / 2; i++) - __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) - invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + unsigned j; + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) { - r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); - r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); } -#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) +} + +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +{ + unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; - const int offset = i * 5; + const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease @@ -240,29 +228,62 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( - invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); } } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" -#endif - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } + + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD) || MLKEM_K == 4 */ + #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); - + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; @@ -290,7 +311,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) MLKEM_NATIVE_INTERNAL_API void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); poly_tobytes_native(r, a); } #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ @@ -302,7 +323,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; @@ -313,7 +334,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) } /* Note that the coefficients are not canonical */ - POLY_UBOUND(r, 4096); + debug_assert_bound(r, MLKEM_N, 0, UINT12_LIMIT); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ MLKEM_NATIVE_INTERNAL_API @@ -333,13 +354,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) for (i = 0; i < MLKEM_N / 8; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* Prevent the compiler from recognizing this as a bit selection */ @@ -347,23 +368,23 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); } } - POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) { unsigned i; - POLY_UBOUND(a, MLKEM_Q); + debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( - invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; @@ -371,104 +392,17 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) } } -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -{ - ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; - ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; - memcpy(extkey0, seed, MLKEM_SYMBYTES); - memcpy(extkey1, seed, MLKEM_SYMBYTES); - memcpy(extkey2, seed, MLKEM_SYMBYTES); - memcpy(extkey3, seed, MLKEM_SYMBYTES); - extkey0[MLKEM_SYMBYTES] = nonce0; - extkey1[MLKEM_SYMBYTES] = nonce1; - extkey2[MLKEM_SYMBYTES] = nonce2; - extkey3[MLKEM_SYMBYTES] = nonce3; - prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); - poly_cbd_eta1(r0, buf0); - poly_cbd_eta1(r1, buf1); - poly_cbd_eta1(r2, buf2); - poly_cbd_eta1(r3, buf3); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); -} - -#if MLKEM_K == 2 || MLKEM_K == 4 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -{ - ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; - - memcpy(extkey, seed, MLKEM_SYMBYTES); - extkey[MLKEM_SYMBYTES] = nonce; - prf_eta2(buf, extkey); - - poly_cbd_eta2(r, buf); - - POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); -} -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -{ - ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; - ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; - ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; - memcpy(extkey[0], seed, MLKEM_SYMBYTES); - memcpy(extkey[1], seed, MLKEM_SYMBYTES); - memcpy(extkey[2], seed, MLKEM_SYMBYTES); - memcpy(extkey[3], seed, MLKEM_SYMBYTES); - extkey[0][MLKEM_SYMBYTES] = nonce0; - extkey[1][MLKEM_SYMBYTES] = nonce1; - extkey[2][MLKEM_SYMBYTES] = nonce2; - extkey[3][MLKEM_SYMBYTES] = nonce3; - - prf_eta1(buf1[0], extkey[0]); - prf_eta1(buf1[1], extkey[1]); - prf_eta2(buf2[0], extkey[2]); - prf_eta2(buf2[1], extkey[3]); - - poly_cbd_eta1(r0, buf1[0]); - poly_cbd_eta1(r1, buf1[1]); - poly_cbd_eta2(r2, buf2[0]); - poly_cbd_eta2(r3, buf2[1]); - - POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); - POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); - POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); - POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); -} -#endif /* MLKEM_K == 2 */ - MLKEM_NATIVE_INTERNAL_API void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, const poly_mulcache *b_cache) { unsigned i; - POLY_BOUND(b_cache, 4096); + debug_assert_bound(a, MLKEM_N, 0, UINT12_LIMIT); for (i = 0; i < MLKEM_N / 4; i++) __loop__( assigns(i, object_whole(r)) - invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(i <= MLKEM_N / 4) invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q))) { basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], @@ -476,6 +410,8 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); } + + debug_assert_abs_bound(r, MLKEM_N, 2 * MLKEM_Q); } #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) @@ -486,20 +422,20 @@ void poly_tomont(poly *r) const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) - invariant(array_abs_bound(r->coeffs ,0, i, MLKEM_Q))) + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) { r->coeffs[i] = fqmul(r->coeffs[i], f); } - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_TOMONT */ MLKEM_NATIVE_INTERNAL_API void poly_tomont(poly *r) { poly_tomont_native(r); - POLY_BOUND(r, MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ @@ -510,7 +446,7 @@ void poly_reduce(poly *r) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) { /* Barrett reduction, giving signed canonical representative */ @@ -519,14 +455,14 @@ void poly_reduce(poly *r) r->coeffs[i] = scalar_signed_to_unsigned_q(t); } - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_REDUCE */ MLKEM_NATIVE_INTERNAL_API void poly_reduce(poly *r) { poly_reduce_native(r); - POLY_UBOUND(r, MLKEM_Q); + debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ @@ -536,7 +472,7 @@ void poly_add(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) { @@ -550,7 +486,7 @@ void poly_sub(poly *r, const poly *b) unsigned i; for (i = 0; i < MLKEM_N; i++) __loop__( - invariant(i >= 0 && i <= MLKEM_N) + invariant(i <= MLKEM_N) invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) { @@ -564,20 +500,36 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) { unsigned i; for (i = 0; i < MLKEM_N / 4; i++) - __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) { x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); } - POLY_BOUND(x, MLKEM_Q); + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); } #else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ MLKEM_NATIVE_INTERNAL_API void poly_mulcache_compute(poly_mulcache *x, const poly *a) { poly_mulcache_compute_native(x, a); - /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + /* Omitting bounds assertion since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ } #endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_poly MLKEM_NAMESPACE_K(empty_cu_poly) +int empty_cu_poly; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h index 1e8c109c6..6a14c785d 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h @@ -307,112 +307,164 @@ __contract__( ************************************************************/ static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) __contract__( - requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) - ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) { + debug_assert_abs_bound(&c, 1, MLKEM_Q); + /* Add Q if c is negative, but in constant time */ c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); - cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); - cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); - /* and therefore cast to uint16_t is safe. */ + debug_assert_bound(&c, 1, 0, MLKEM_Q); return (uint16_t)c; } -#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || \ + (MLKEM_K == 2 || MLKEM_K == 3) +#define poly_compress_d4 MLKEM_NAMESPACE(poly_compress_d4) /************************************************* - * Name: poly_compress_du + * Name: poly_compress_d4 * - * Description: Compression (du bits) and subsequent serialization of a - *polynomial + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) -); +void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a); + +#define poly_compress_d10 MLKEM_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a); -#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +#define poly_decompress_d4 MLKEM_NAMESPACE(poly_decompress_d4) /************************************************* - * Name: poly_decompress_du + * Name: poly_decompress_d4 * - * Description: De-serialization and subsequent decompression (du bits) of a - *polynomial; approximate inverse of poly_compress_du + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(memory_slice(r, sizeof(poly))) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); -#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +#define poly_decompress_d10 MLKEM_NAMESPACE(poly_decompress_d10) /************************************************* - * Name: poly_compress_dv + * Name: poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of poly_compress_d10 * - * Description: Compression (dv bits) and subsequent serialization of a - *polynomial + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d10(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \ + || MLKEM_K == 3) */ + +#if defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 +#define poly_compress_d5 MLKEM_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial * * Arguments: - uint8_t *r: pointer to output byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * - const poly *a: pointer to input polynomial * Coefficients must be unsigned canonical, * i.e. in [0,1,..,MLKEM_Q-1]. **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) -__contract__( - requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(a, sizeof(poly))) - requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) - assigns(object_whole(r)) -); +void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a); -#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +#define poly_compress_d11 MLKEM_NAMESPACE(poly_compress_d11) /************************************************* - * Name: poly_decompress_dv + * Name: poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a); + +#define poly_decompress_d5 MLKEM_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: poly_decompress_d5 * * Description: De-serialization and subsequent decompression (dv bits) of a - *polynomial; approximate inverse of poly_compress + * polynomial; approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array - * (of length MLKEM_POLYCOMPRESSEDBYTES_DV - *bytes) + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) * * Upon return, the coefficients of the output polynomial are unsigned-canonical * (non-negative and smaller than MLKEM_Q). * **************************************************/ MLKEM_NATIVE_INTERNAL_API -void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) -__contract__( - requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) - requires(memory_no_alias(r, sizeof(poly))) - assigns(object_whole(r)) - ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) -); +void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define poly_decompress_d11 MLKEM_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of poly_compress_d11 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_decompress_d11(poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* defined(MLKEM_NATIVE_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \ + */ #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) /************************************************* @@ -500,144 +552,6 @@ __contract__( assigns(object_whole(msg)) ); -#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) -/************************************************* - * Name: poly_getnoise_eta1_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and nonces, with output polynomials close to centered binomial distribution - * with parameter MLKEM_ETA1. - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, - uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) -/* Depending on MLKEM_K, the pointers passed to this function belong - to the same objects, so we cannot use memory_no_alias for r0-r3. - - NOTE: Somehow it is important to use memory_no_alias() first in the - conjunctions defining each case. -*/ -#if MLKEM_K == 2 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 4 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case B: r0, r1, r2, r3 consecutive */ - (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#elif MLKEM_K == 3 -__contract__( - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - requires( /* Case C: r0, r1, r2 consecutive */ - (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && - r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) - assigns(memory_slice(r0, sizeof(poly))) - assigns(memory_slice(r1, sizeof(poly))) - assigns(memory_slice(r2, sizeof(poly))) - assigns(memory_slice(r3, sizeof(poly))) - ensures( - array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); -); -#endif /* MLKEM_K */ - -#if MLKEM_ETA1 == MLKEM_ETA2 -/* - * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 - * where MLKEM_ETA2 = MLKEM_ETA1 = 2. - * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. - */ -#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x -#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ - -#if MLKEM_K == 2 || MLKEM_K == 4 -#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) -/************************************************* - * Name: poly_getnoise_eta2 - * - * Description: Sample a polynomial deterministically from a seed and a nonce, - * with output polynomial close to centered binomial distribution - * with parameter MLKEM_ETA2 - * - * Arguments: - poly *r: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce) -__contract__( - requires(memory_no_alias(r, sizeof(poly))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r)) - ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) -); -#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ - -#if MLKEM_K == 2 -#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) -/************************************************* - * Name: poly_getnoise_eta1122_4x - * - * Description: Batch sample four polynomials deterministically from a seed - * and a nonces, with output polynomials close to centered binomial - * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 - * - * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial - * - const uint8_t *seed: pointer to input seed - * (of length MLKEM_SYMBYTES bytes) - * - uint8_t nonce{0,1,2,3}: one-byte input nonce - **************************************************/ -MLKEM_NATIVE_INTERNAL_API -void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, - const uint8_t seed[MLKEM_SYMBYTES], - uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, - uint8_t nonce3) -__contract__( - requires( /* r0, r1 consecutive, r2, r3 consecutive */ - (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && - r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) - requires(memory_no_alias(seed, MLKEM_SYMBYTES)) - assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) - ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) - && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) - && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); -); -#endif /* MLKEM_K == 2 */ - #define poly_basemul_montgomery_cached \ MLKEM_NAMESPACE(poly_basemul_montgomery_cached) /************************************************* @@ -649,8 +563,7 @@ __contract__( * Bounds: * - a is assumed to be coefficient-wise < q in absolute value. * - * The result is coefficient-wise bound by 3/2 q in absolute - * value. + * The result is coefficient-wise bound by 2*q in absolute value. * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial @@ -802,4 +715,4 @@ __contract__( assigns(object_whole(r)) ); -#endif +#endif /* POLY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c index 7d2016773..50ea1c34a 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c @@ -4,18 +4,29 @@ */ #include "polyvec.h" #include +#include #include "arith_backend.h" +#include "cbd.h" #include "ntt.h" #include "poly.h" +#include "symmetric.h" -#include "debug/debug.h" +#include "debug.h" + +/* Static namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define poly_cbd_eta1 MLKEM_NAMESPACE_K(poly_cbd_eta1) +#define poly_cbd_eta2 MLKEM_NAMESPACE_K(poly_cbd_eta2) +/* End of static namespacing */ MLKEM_NATIVE_INTERNAL_API void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], const polyvec *a) { unsigned i; - POLYVEC_UBOUND(a, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_K; i++) { @@ -33,13 +44,15 @@ void polyvec_decompress_du(polyvec *r, poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); } - POLYVEC_UBOUND(r, MLKEM_Q); + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) { unsigned i; + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + for (i = 0; i < MLKEM_K; i++) { poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); @@ -54,6 +67,8 @@ void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) { poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); } MLKEM_NATIVE_INTERNAL_API @@ -64,6 +79,8 @@ void polyvec_ntt(polyvec *r) { poly_ntt(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, NTT_BOUND); } MLKEM_NATIVE_INTERNAL_API @@ -74,6 +91,8 @@ void polyvec_invntt_tomont(polyvec *r) { poly_invntt_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, INVNTT_BOUND); } #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) @@ -84,10 +103,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, { unsigned i; poly t; - - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - POLYVEC_BOUND(b_cache, MLKEM_Q); + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); for (i = 1; i < MLKEM_K; i++) @@ -95,18 +111,15 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); poly_add(r, &t); - /* abs bounds: < (i+1) * 3/2 * q */ } /* - * Those bounds are true for the C implementation, but not needed - * in the higher level bounds reasoning. It is thus best to omit - * them from the spec to not unnecessarily constraint native implementations. + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * them from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. */ - cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * 2 * MLKEM_Q), - "polyvec_basemul_acc_montgomery_cached output bounds"); - /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ - POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q); + debug_assert_abs_bound(r, MLKEM_K, MLKEM_N * 2 * MLKEM_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ MLKEM_NATIVE_INTERNAL_API @@ -114,9 +127,8 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache) { - POLYVEC_BOUND(a, 4096); - POLYVEC_BOUND(b, NTT_BOUND); - /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, UINT12_LIMIT); + /* Omitting bounds assertion for cache since native implementations may * decide not to use a mulcache. Note that the C backend implementation * of poly_basemul_montgomery_cached() does still include the check. */ polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); @@ -149,6 +161,8 @@ void polyvec_reduce(polyvec *r) { poly_reduce(&r->vec[i]); } + + debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q); } MLKEM_NATIVE_INTERNAL_API @@ -169,4 +183,148 @@ void polyvec_tomont(polyvec *r) { poly_tomont(&r->vec[i]); } + + debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta1(poly *r, + const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1]; + ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1]; + memcpy(extkey0, seed, MLKEM_SYMBYTES); + memcpy(extkey1, seed, MLKEM_SYMBYTES); + memcpy(extkey2, seed, MLKEM_SYMBYTES); + memcpy(extkey3, seed, MLKEM_SYMBYTES); + extkey0[MLKEM_SYMBYTES] = nonce0; + extkey1[MLKEM_SYMBYTES] = nonce1; + extkey2[MLKEM_SYMBYTES] = nonce2; + extkey3[MLKEM_SYMBYTES] = nonce3; + prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3); + poly_cbd_eta1(r0, buf0); + poly_cbd_eta1(r1, buf1); + poly_cbd_eta1(r2, buf2); + poly_cbd_eta1(r3, buf3); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static INLINE void poly_cbd_eta2(poly *r, + const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + prf_eta2(buf, extkey); + + poly_cbd_eta2(r, buf); + + debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + + +#if MLKEM_K == 2 +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + prf_eta1(buf1[0], extkey[0]); + prf_eta1(buf1[1], extkey[1]); + prf_eta2(buf2[0], extkey[2]); + prf_eta2(buf2[1], extkey[3]); + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); } +#endif /* MLKEM_K == 2 */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h index 138724150..8be8579e0 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h @@ -9,19 +9,144 @@ #include "common.h" #include "poly.h" -#define polyvec MLKEM_NAMESPACE(polyvec) +#define polyvec MLKEM_NAMESPACE_K(polyvec) typedef struct { poly vec[MLKEM_K]; } ALIGN polyvec; -#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache) +#define polyvec_mulcache MLKEM_NAMESPACE_K(polyvec_mulcache) typedef struct { poly_mulcache vec[MLKEM_K]; } polyvec_mulcache; -#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +#define poly_compress_du MLKEM_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_decompress_du MLKEM_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_du( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define poly_compress_dv MLKEM_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +static INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], + const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(object_whole(r))) +{ +#if MLKEM_DV == 4 + poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define poly_decompress_dv MLKEM_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +static INLINE void poly_decompress_dv( + poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define polyvec_compress_du MLKEM_NAMESPACE_K(polyvec_compress_du) /************************************************* * Name: polyvec_compress_du * @@ -44,7 +169,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +#define polyvec_decompress_du MLKEM_NAMESPACE_K(polyvec_decompress_du) /************************************************* * Name: polyvec_decompress_du * @@ -67,7 +192,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +#define polyvec_tobytes MLKEM_NAMESPACE_K(polyvec_tobytes) /************************************************* * Name: polyvec_tobytes * @@ -88,7 +213,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +#define polyvec_frombytes MLKEM_NAMESPACE_K(polyvec_frombytes) /************************************************* * Name: polyvec_frombytes * @@ -110,7 +235,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_LIMIT))) ); -#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +#define polyvec_ntt MLKEM_NAMESPACE_K(polyvec_ntt) /************************************************* * Name: polyvec_ntt * @@ -136,7 +261,7 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, NTT_BOUND))) ); -#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +#define polyvec_invntt_tomont MLKEM_NAMESPACE_K(polyvec_invntt_tomont) /************************************************* * Name: polyvec_invntt_tomont * @@ -162,7 +287,7 @@ __contract__( ); #define polyvec_basemul_acc_montgomery \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery) /************************************************* * Name: polyvec_basemul_acc_montgomery * @@ -186,7 +311,7 @@ __contract__( #define polyvec_basemul_acc_montgomery_cached \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) /************************************************* * Name: polyvec_basemul_acc_montgomery_cached * @@ -194,7 +319,7 @@ __contract__( * using mulcache for second operand. * * Bounds: - * - a is assumed to be coefficient-wise < 4096 in absolute value. + * - Every coefficient of a is assumed to be in [0..4095] * - No bounds guarantees for the coefficients in the result. * * Arguments: - poly *r: pointer to output polynomial @@ -218,7 +343,7 @@ __contract__( assigns(memory_slice(r, sizeof(poly))) ); -#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +#define polyvec_mulcache_compute MLKEM_NAMESPACE_K(polyvec_mulcache_compute) /************************************************************ * Name: polyvec_mulcache_compute * @@ -252,7 +377,7 @@ __contract__( assigns(object_whole(x)) ); -#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +#define polyvec_reduce MLKEM_NAMESPACE_K(polyvec_reduce) /************************************************* * Name: polyvec_reduce * @@ -278,7 +403,7 @@ __contract__( array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) ); -#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +#define polyvec_add MLKEM_NAMESPACE_K(polyvec_add) /************************************************* * Name: polyvec_add * @@ -309,7 +434,7 @@ __contract__( assigns(object_whole(r)) ); -#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +#define polyvec_tomont MLKEM_NAMESPACE_K(polyvec_tomont) /************************************************* * Name: polyvec_tomont * @@ -329,4 +454,142 @@ __contract__( array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) ); +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define poly_getnoise_eta2 MLKEM_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)); +); +#endif /* MLKEM_K == 2 */ + #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h index 1f502167e..b432a4201 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h @@ -8,7 +8,7 @@ #include #include "cbmc.h" #include "common.h" -#include "debug/debug.h" +#include "debug.h" /* Static namespacing * This is to facilitate building multiple instances @@ -109,13 +109,13 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a) **************************************************/ static INLINE int16_t montgomery_reduce(int32_t a) __contract__( - requires(a > -(2 * 4096 * 32768)) - requires(a < (2 * 4096 * 32768)) + requires(a > -(2 * UINT12_LIMIT * 32768)) + requires(a < (2 * UINT12_LIMIT * 32768)) ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q) ) { int16_t res; - SCALAR_BOUND(a, 2 * UINT12_LIMIT * 32768, "montgomery_reduce input"); + debug_assert_abs_bound(&a, 1, 2 * UINT12_LIMIT * 32768); res = montgomery_reduce_generic(a); /* Bounds: @@ -124,7 +124,7 @@ __contract__( * <= UINT12_LIMIT + (MLKEM_Q + 1) / 2 * < 2 * MLKEM_Q */ - SCALAR_BOUND(res, 2 * MLKEM_Q, "montgomery_reduce output"); + debug_assert_abs_bound(&res, 1, 2 * MLKEM_Q); return res; } @@ -150,7 +150,7 @@ __contract__( ) { int16_t res; - SCALAR_BOUND(b, HALF_Q, "fqmul input"); + debug_assert_abs_bound(&b, 1, HALF_Q); res = montgomery_reduce((int32_t)a * (int32_t)b); /* Bounds: @@ -160,7 +160,7 @@ __contract__( * < MLKEM_Q */ - SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + debug_assert_abs_bound(&res, 1, MLKEM_Q); return res; } @@ -200,7 +200,10 @@ __contract__( * t is in -10 .. +10, so we need 32-bit math to * evaluate t * MLKEM_Q and the subsequent subtraction */ - return (int16_t)(a - t * MLKEM_Q); + int16_t res = (int16_t)(a - t * MLKEM_Q); + + debug_assert_abs_bound(&res, 1, HALF_Q); + return res; } #endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c index 918986e9b..cbbe4407f 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c @@ -2,46 +2,24 @@ * Copyright (c) 2024 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) -#include "rej_uniform.h" #include "arith_backend.h" +#include "debug.h" +#include "fips202.h" +#include "fips202x4.h" +#include "rej_uniform.h" +#include "symmetric.h" /* Static namespacing * This is to facilitate building multiple instances * of mlkem-native (e.g. with varying security levels) * within a single compilation unit. */ +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) #define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar) /* End of static namespacing */ -/************************************************* - * Name: rej_uniform_scalar - * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q - * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. - * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. - **************************************************/ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, unsigned int offset, const uint8_t *buf, unsigned int buflen) @@ -58,6 +36,8 @@ __contract__( unsigned int ctr, pos; uint16_t val0, val1; + debug_assert_bound(r, offset, 0, MLKEM_Q); + ctr = offset; pos = 0; /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ @@ -79,28 +59,183 @@ __contract__( r[ctr++] = val1; } } + + debug_assert_bound(r, ctr, 0, MLKEM_Q); return ctr; } #if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * NOTE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) +) { return rej_uniform_scalar(r, target, offset, buf, buflen); } #else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ - -MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +static unsigned int rej_uniform(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) { int ret; /* Sample from large buffer with full lane as much as possible. */ ret = rej_uniform_native(r + offset, target - offset, buf, buflen); if (ret != -1) - return offset + (unsigned)ret; + { + unsigned res = offset + (unsigned)ret; + debug_assert_bound(r, res, 0, MLKEM_Q); + return res; + } return rej_uniform_scalar(r, target, offset, buf, buflen); } #endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE) +#endif + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + xof_x4_ctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + xof_x4_release(&statex); +} + +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + xof_ctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + xof_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + xof_release(&state); +} + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_rej_uniform MLKEM_NAMESPACE_K(empty_cu_rej_uniform) +int empty_cu_rej_uniform; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h index 13db836bc..801287259 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h @@ -9,54 +9,55 @@ #include #include "cbmc.h" #include "common.h" +#include "poly.h" -#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +#define poly_rej_uniform_x4 MLKEM_NAMESPACE(poly_rej_uniform_x4) /************************************************* - * Name: rej_uniform + * Name: poly_rej_uniform_x4 * - * Description: Run rejection sampling on uniform random bytes to generate - * uniform random integers mod q + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. * - * Arguments: - int16_t *r: pointer to output buffer - * - unsigned int target: requested number of 16-bit integers - * (uniform mod q). - * Must be <= 4096. - * - unsigned int offset: number of 16-bit integers that have - * already been sampled. - * Must be <= target. - * - const uint8_t *buf: pointer to input buffer - * (assumed to be uniform random bytes) - * - unsigned int buflen: length of input buffer in bytes - * Must be <= 4096. - * Must be a multiple of 3. + * Arguments: - poly *vec: Pointer to an array of 4 polynomials + * to be sampled. + * - uint8_t *seed[4]: Pointer to array of four pointers + * pointing to the seed buffers of size + * MLKEM_SYMBYTES + 2 each. * - * Note: Strictly speaking, only a few values of buflen near UINT_MAX need - * excluding. The limit of 4096 is somewhat arbitary but sufficient for all - * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. - * - * Returns the new offset of sampled 16-bit integers, at most target, - * and at least the initial offset. - * If the new offset is strictly less than len, all of the input buffers - * is guaranteed to have been consumed. If it is equal to len, no information - * is provided on how many bytes of the input buffer have been consumed. **************************************************/ +MLKEM_NATIVE_INTERNAL_API +void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q))); -/* - * NOTE: The signature differs from the Kyber reference implementation - * in that it adds the offset and always expects the base of the target - * buffer. This avoids shifting the buffer base in the caller, which appears - * tricky to reason about. - */ +#define poly_rej_uniform MLKEM_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + **************************************************/ MLKEM_NATIVE_INTERNAL_API -unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, - const uint8_t *buf, unsigned int buflen) +void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) __contract__( - requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) - requires(memory_no_alias(r, sizeof(int16_t) * target)) - requires(memory_no_alias(buf, buflen)) - requires(offset > 0 ==> array_bound(r, 0, offset, 0, MLKEM_Q)) - assigns(memory_slice(r, sizeof(int16_t) * target)) - ensures(offset <= return_value && return_value <= target) - ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, MLKEM_Q)) -); -#endif + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* REJ_UNIFORM_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/symmetric.h index 55ebbbd53..3563e5505 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/symmetric.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/symmetric.h @@ -10,6 +10,7 @@ #include "cbmc.h" #include "common.h" #include "fips202.h" +#include "fips202x4.h" /* Macros denoting FIPS-203 specific Hash functions */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.c index b7078fcc1..9f39dcd22 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.c @@ -4,7 +4,8 @@ */ #include "verify.h" -#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) && \ + !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) /* * Masking value used in constant-time functions from * verify.h to block the compiler's range analysis and @@ -12,9 +13,11 @@ */ volatile uint64_t ct_opt_blocker_u64 = 0; -#else /* MLKEM_USE_ASM_VALUE_BARRIER */ +#else /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ -#define empty_cu_verify MLKEM_NAMESPACE(empty_cu_verify) +#define empty_cu_verify MLKEM_NAMESPACE_K(empty_cu_verify) int empty_cu_verify; -#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ +#endif /* MLKEM_USE_ASM_VALUE_BARRIER && \ + !MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h index 8c47155dc..f6ecf5eba 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h @@ -268,7 +268,7 @@ __contract__( for (i = 0; i < len; i++) __loop__( - invariant(i >= 0 && i <= len) + invariant(i <= len) invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) { r |= a[i] ^ b[i]; @@ -314,4 +314,4 @@ __contract__( } } -#endif +#endif /* VERIFY_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h index ce13e7911..25e00a930 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h @@ -42,7 +42,7 @@ void basemul_avx2(__m256i *r, const __m256i *a, const __m256i *b, const __m256i *qdata); #define polyvec_basemul_acc_montgomery_cached_avx2 \ - MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_avx2) + MLKEM_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached_avx2) void polyvec_basemul_acc_montgomery_cached_avx2( poly *r, const polyvec *a, const polyvec *b, const polyvec_mulcache *b_cache); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/default_impl.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/default_impl.h index 66de8c85f..029111c17 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/default_impl.h +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/default_impl.h @@ -28,9 +28,6 @@ #define MLKEM_USE_NATIVE_POLY_TOBYTES #define MLKEM_USE_NATIVE_POLY_FROMBYTES -#define INVNTT_BOUND_NATIVE (8 * MLKEM_Q) -#define NTT_BOUND_NATIVE (8 * MLKEM_Q) - static INLINE void poly_permute_bitrev_to_custom(poly *data) { nttunpack_avx2((__m256i *)(data->coeffs), qdata.vec); diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/zetas.c index 1a26e0dd5..4ef887c62 100644 --- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/zetas.c +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/zetas.c @@ -8,6 +8,8 @@ * Do not modify it directly. */ +#include "common.h" +#if !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) #include "ntt.h" /* @@ -28,3 +30,10 @@ ALIGN const int16_t zetas[128] = { -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628, }; + +#else /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */ + +#define empty_cu_zetas MLKEM_NAMESPACE_K(empty_cu_zetas) +int empty_cu_zetas; + +#endif /* MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED */