Skip to content

Commit

Permalink
bn: Move x86-64 argument-based dispatching of bn_mul_mont to C.
Browse files Browse the repository at this point in the history
Take a step towards moving the OPENSSL_ia32cap_P usage out of
x86_64-mont.pl. The MULX+ADX dispatching within |bn_sqr8x_mont| is
deferred to a future change.

Bug: 673
Change-Id: I8768bb33d2c289fd7ccf8743b51721e55ab74f35
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65527
Reviewed-by: Bob Beck <[email protected]>
Reviewed-by: David Benjamin <[email protected]>
Commit-Queue: David Benjamin <[email protected]>
  • Loading branch information
briansmith authored and Boringssl LUCI CQ committed Jan 26, 2024
1 parent 48dce6d commit 7cb8df5
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 38 deletions.
55 changes: 17 additions & 38 deletions crypto/fipsmodule/bn/asm/x86_64-mont.pl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
# output, so this isn't useful anyway.
$addx = 1;

# int bn_mul_mont(
# int bn_mul_mont_nohw(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
Expand All @@ -87,33 +87,15 @@
.extern OPENSSL_ia32cap_P
.globl bn_mul_mont
.type bn_mul_mont,\@function,6
.globl bn_mul_mont_nohw
.type bn_mul_mont_nohw,\@function,6
.align 16
bn_mul_mont:
bn_mul_mont_nohw:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
jb .Lmul_enter
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
___
$code.=<<___;
cmp $ap,$bp
jne .Lmul4x_enter
test \$7,${num}d
jz .Lsqr8x_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -348,27 +330,21 @@
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
___
{{{
my @A=("%r10","%r11");
my @N=("%r13","%rdi");
$code.=<<___;
.globl bn_mul4x_mont
.type bn_mul4x_mont,\@function,6
.align 16
bn_mul4x_mont:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
and \$0x80100,%r11d
cmp \$0x80100,%r11d
je .Lmulx4x_enter
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -806,7 +782,7 @@
}}}
{{{
######################################################################
# void bn_sqr8x_mont(
# int bn_sqr8x_mont(
my $rptr="%rdi"; # const BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # not used
Expand All @@ -825,13 +801,15 @@
$code.=<<___;
.extern bn_sqr8x_internal # see x86_64-mont5 module
.globl bn_sqr8x_mont
.type bn_sqr8x_mont,\@function,6
.align 32
bn_sqr8x_mont:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1024,13 +1002,14 @@
my $bp="%rdx"; # original value

$code.=<<___;
.globl bn_mulx4x_mont
.type bn_mulx4x_mont,\@function,6
.align 32
bn_mulx4x_mont:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1535,9 +1514,9 @@
.section .pdata
.align 4
.rva .LSEH_begin_bn_mul_mont
.rva .LSEH_end_bn_mul_mont
.rva .LSEH_info_bn_mul_mont
.rva .LSEH_begin_bn_mul_mont_nohw
.rva .LSEH_end_bn_mul_mont_nohw
.rva .LSEH_info_bn_mul_mont_nohw
.rva .LSEH_begin_bn_mul4x_mont
.rva .LSEH_end_bn_mul4x_mont
Expand All @@ -1555,7 +1534,7 @@
$code.=<<___;
.section .xdata
.align 8
.LSEH_info_bn_mul_mont:
.LSEH_info_bn_mul_mont_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lmul_body,.Lmul_epilogue # HandlerData[]
Expand Down
23 changes: 23 additions & 0 deletions crypto/fipsmodule/bn/bn_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2881,10 +2881,33 @@ TEST_F(BNTest, BNMulMontABI) {
a[0] = 1;
b[0] = 42;

#if defined(OPENSSL_X86_64)
if (bn_mulx4x_mont_capable(words)) {
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
if (bn_mul4x_mont_capable(words)) {
CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
if (bn_sqr8x_mont_capable(words)) {
CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
#else
CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
words);
CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0,
words);
#endif
}
}
#endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST
Expand Down
23 changes: 23 additions & 0 deletions crypto/fipsmodule/bn/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,29 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
// inputs.
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);

#if defined(OPENSSL_X86_64)
int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
return (num >= 8) && ((num & 3) == 0);
}
int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
// MULX is in BMI2.
return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() &&
CRYPTO_is_ADX_capable();
}
int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
return (num >= 8) && ((num & 7) == 0);
}
int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif // defined(OPENSSL_X86_64)

#endif

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
Expand Down
17 changes: 17 additions & 0 deletions crypto/fipsmodule/bn/montgomery.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,20 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}

#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num)
{
if (ap == bp && bn_sqr8x_mont_capable(num)) {
return bn_sqr8x_mont(rp, ap, bp, np, n0, num);
}
if (bn_mulx4x_mont_capable(num)) {
return bn_mulx4x_mont(rp, ap, bp, np, n0, num);
}
if (bn_mul4x_mont_capable(num)) {
return bn_mul4x_mont(rp, ap, bp, np, n0, num);
}
return bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#endif

0 comments on commit 7cb8df5

Please sign in to comment.