Skip to content

Commit

Permalink
arithmetic: Move Aaarch64 length-based dispatching to Rust.
Browse files Browse the repository at this point in the history
  • Loading branch information
briansmith committed Jan 30, 2025
1 parent bee592c commit 6cc0569
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 27 deletions.
39 changes: 15 additions & 24 deletions crypto/fipsmodule/bn/asm/armv8-mont.pl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
$lo1,$hi1,$nj,$m1,$nlo,$nhi,
$ovf, $i,$j,$tp,$tj) = map("x$_",6..17,19..24);

# int bn_mul_mont(
# void bn_mul_mont_nohw(
$rp="x0"; # BN_ULONG *rp,
$ap="x1"; # const BN_ULONG *ap,
$bp="x2"; # const BN_ULONG *bp,
Expand All @@ -68,16 +68,11 @@

.text

.globl bn_mul_mont
.type bn_mul_mont,%function
.globl bn_mul_mont_nohw
.type bn_mul_mont_nohw,%function
.align 5
bn_mul_mont:
bn_mul_mont_nohw:
AARCH64_SIGN_LINK_REGISTER
tst $num,#7
b.eq __bn_sqr8x_mont
tst $num,#3
b.eq __bn_mul4x_mont
.Lmul_mont:
stp x29,x30,[sp,#-64]!
add x29,sp,#0
stp x19,x20,[sp,#16]
Expand Down Expand Up @@ -272,7 +267,7 @@
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.size bn_mul_mont,.-bn_mul_mont
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
___
{
########################################################################
Expand All @@ -285,14 +280,11 @@
my ($tp,$ap_end,$na0)=($bp,$np,$carry);

$code.=<<___;
.type __bn_sqr8x_mont,%function
.globl bn_sqr8x_mont
.type bn_sqr8x_mont,%function
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp $ap,$bp
b.ne __bn_mul4x_mont
.Lsqr8x_mont:
bn_sqr8x_mont:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
Expand Down Expand Up @@ -1049,7 +1041,7 @@
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
.size bn_sqr8x_mont,.-bn_sqr8x_mont
___
}

Expand All @@ -1068,12 +1060,11 @@
my ($carry,$topmost) = ($rp,"x30");

$code.=<<___;
.type __bn_mul4x_mont,%function
.globl bn_mul4x_mont
.type bn_mul4x_mont,%function
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
bn_mul4x_mont:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
Expand Down Expand Up @@ -1510,7 +1501,7 @@
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont
.size bn_mul4x_mont,.-bn_mul4x_mont
___
}
$code.=<<___;
Expand Down
10 changes: 10 additions & 0 deletions crypto/fipsmodule/bn/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,16 @@ static inline void bn_mul_mont_small(
const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#elif defined(OPENSSL_AARCH64)
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
static inline void bn_mul_mont_small(
BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
// No point in optimizing for P-256 because P-256 doesn't call into
// this on AArch64.
bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#elif defined(OPENSSL_ARM)
void bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
Expand Down
1 change: 1 addition & 0 deletions src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::{error::LenMismatchError, limb::LIMB_BITS};

#[macro_use]
mod ffi;
mod aarch64_mont;
mod x86_64_mont;

mod constant;
Expand Down
59 changes: 59 additions & 0 deletions src/arithmetic/aarch64_mont.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2025 Brian Smith.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#![cfg(all(target_arch = "aarch64", target_endian = "little"))]

use super::{inout::AliasingSlices3 as _, n0::N0, LimbSliceError, MAX_LIMBS};
use crate::{
c,
limb::Limb,
polyfill::slice::{AsChunks, AsChunksMut},
};
use core::num::NonZeroUsize;

#[inline]
pub(super) fn sqr_mont5(
mut in_out: AsChunksMut<Limb, 8>,
n: AsChunks<Limb, 8>,
n0: &N0,
) -> Result<(), LimbSliceError> {
prefixed_extern! {
// `r` and/or 'a' may alias.
// XXX: BoringSSL (kinda, implicitly) declares this to return `int`.
// `num` must be a non-zero multiple of 8.
fn bn_sqr8x_mont(
rp: *mut Limb,
ap: *const Limb,
ap_again: *const Limb,
np: *const Limb,
n0: &N0,
num: c::NonZero_size_t);
}

let in_out = in_out.as_flattened_mut();
let n = n.as_flattened();
let num_limbs = NonZeroUsize::new(n.len()).ok_or_else(|| LimbSliceError::too_short(n.len()))?;

// Avoid stack overflow from the alloca inside.
if num_limbs.get() > MAX_LIMBS {
return Err(LimbSliceError::too_long(num_limbs.get()));
}

in_out
.with_non_dangling_non_null_pointers_rab(num_limbs, |r, a, a_again| {
let n = n.as_ptr(); // Non-dangling because num_limbs > 0.
unsafe { bn_sqr8x_mont(r, a, a_again, n, n0, num_limbs) };
})
.map_err(LimbSliceError::len_mismatch)
}
23 changes: 20 additions & 3 deletions src/arithmetic/montgomery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,17 @@ pub(super) fn limbs_mul_mont(
cfg_if! {
if #[cfg(all(target_arch = "aarch64", target_endian = "little"))] {
let _: cpu::Features = cpu;
bn_mul_mont_ffi!(in_out, n, n0, (), unsafe {
(MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont
})
const MIN_4X: usize = 4;
const MOD_4X: usize = 4;
if n.len() >= MIN_4X && n.len() % MOD_4X == 0 {
bn_mul_mont_ffi!(in_out, n, n0, (), unsafe {
(MIN_4X, MOD_4X, ()) => bn_mul4x_mont
})
} else {
bn_mul_mont_ffi!(in_out, n, n0, (), unsafe {
(MIN_LIMBS, MOD_FALLBACK, ()) => bn_mul_mont_nohw
})
}
} else if #[cfg(all(target_arch = "arm", target_endian = "little"))] {
const MIN_8X: usize = 8;
const MOD_8X: usize = 8;
Expand Down Expand Up @@ -297,6 +305,15 @@ pub(super) fn limbs_square_mont(
n0: &N0,
cpu: cpu::Features,
) -> Result<(), LimbSliceError> {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
{
use super::aarch64_mont;
use crate::polyfill::slice;
if let ((r, []), (n, [])) = (slice::as_chunks_mut(r), slice::as_chunks(n)) {
return aarch64_mont::sqr_mont5(r, n, n0);
}
}

#[cfg(target_arch = "x86_64")]
{
use super::x86_64_mont;
Expand Down

0 comments on commit 6cc0569

Please sign in to comment.