From 7cb8df579329b70cd4ede09d6d228636b8e31e89 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Wed, 13 Dec 2023 09:45:59 -0800 Subject: [PATCH] bn: Move x86-64 argument-based dispatching of bn_mul_mont to C. Take a step towards moving the OPENSSL_ia32cap_P usage out of x86_64-mont.pl. The MULX+ADX dispatching within |bn_sqr8x_mont| is deferred to a future change. Bug: 673 Change-Id: I8768bb33d2c289fd7ccf8743b51721e55ab74f35 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65527 Reviewed-by: Bob Beck Reviewed-by: David Benjamin Commit-Queue: David Benjamin --- crypto/fipsmodule/bn/asm/x86_64-mont.pl | 55 ++++++++----------------- crypto/fipsmodule/bn/bn_test.cc | 23 +++++++++++ crypto/fipsmodule/bn/internal.h | 23 +++++++++++ crypto/fipsmodule/bn/montgomery.c | 17 ++++++++ 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl index be4c69b55a..875a5a5e66 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl @@ -65,7 +65,7 @@ # output, so this isn't useful anyway. $addx = 1; -# int bn_mul_mont( +# int bn_mul_mont_nohw( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, @@ -87,33 +87,15 @@ .extern OPENSSL_ia32cap_P -.globl bn_mul_mont -.type bn_mul_mont,\@function,6 +.globl bn_mul_mont_nohw +.type bn_mul_mont_nohw,\@function,6 .align 16 -bn_mul_mont: +bn_mul_mont_nohw: .cfi_startproc _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax - test \$3,${num}d - jnz .Lmul_enter - cmp \$8,${num}d - jb .Lmul_enter -___ -$code.=<<___ if ($addx); - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d -___ -$code.=<<___; - cmp $ap,$bp - jne .Lmul4x_enter - test \$7,${num}d - jz .Lsqr8x_enter - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: push %rbx .cfi_push %rbx push %rbp @@ -348,27 +330,21 @@ .Lmul_epilogue: ret .cfi_endproc -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw ___ {{{ my @A=("%r10","%r11"); my @N=("%r13","%rdi"); $code.=<<___; +.globl bn_mul4x_mont .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: .cfi_startproc + _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -___ -$code.=<<___ if ($addx); - and \$0x80100,%r11d - cmp \$0x80100,%r11d - je .Lmulx4x_enter -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -806,7 +782,7 @@ }}} {{{ ###################################################################### -# void bn_sqr8x_mont( +# int bn_sqr8x_mont( my $rptr="%rdi"; # const BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # not used @@ -825,13 +801,15 @@ $code.=<<___; .extern bn_sqr8x_internal # see x86_64-mont5 module +.globl bn_sqr8x_mont .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: .cfi_startproc + _CET_ENDBR + mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax -.Lsqr8x_enter: push %rbx .cfi_push %rbx push %rbp @@ -1024,13 +1002,14 @@ my $bp="%rdx"; # original value $code.=<<___; +.globl bn_mulx4x_mont .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: push %rbx .cfi_push %rbx push %rbp @@ -1535,9 +1514,9 @@ .section .pdata .align 4 - .rva .LSEH_begin_bn_mul_mont - .rva .LSEH_end_bn_mul_mont - .rva .LSEH_info_bn_mul_mont + .rva .LSEH_begin_bn_mul_mont_nohw + .rva .LSEH_end_bn_mul_mont_nohw + .rva .LSEH_info_bn_mul_mont_nohw .rva .LSEH_begin_bn_mul4x_mont .rva .LSEH_end_bn_mul4x_mont @@ -1555,7 +1534,7 @@ $code.=<<___; .section .xdata .align 8 -.LSEH_info_bn_mul_mont: +.LSEH_info_bn_mul_mont_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lmul_body,.Lmul_epilogue # HandlerData[] diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 08c4719d25..13042ea65e 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -2881,10 +2881,33 @@ TEST_F(BNTest, BNMulMontABI) { a[0] = 1; b[0] = 42; +#if defined(OPENSSL_X86_64) + if (bn_mulx4x_mont_capable(words)) { + CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + if (bn_mul4x_mont_capable(words)) { + CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + if (bn_sqr8x_mont_capable(words)) { + CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } +#else CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); +#endif } } #endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index d556488bf4..4de201f5af 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -400,6 +400,29 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive, // inputs. int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); + +#if defined(OPENSSL_X86_64) +int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { + return (num >= 8) && ((num & 3) == 0); +} +int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { + // MULX is in BMI2. + return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable(); +} +int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { + return (num >= 8) && ((num & 7) == 0); +} +int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +#endif // defined(OPENSSL_X86_64) + #endif #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index f219d42278..86b64c67a6 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -504,3 +504,20 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a, } OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG)); } + +#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) +{ + if (ap == bp && bn_sqr8x_mont_capable(num)) { + return bn_sqr8x_mont(rp, ap, bp, np, n0, num); + } + if (bn_mulx4x_mont_capable(num)) { + return bn_mulx4x_mont(rp, ap, bp, np, n0, num); + } + if (bn_mul4x_mont_capable(num)) { + return bn_mul4x_mont(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); +} +#endif