From b3cda5cf4549b131b09e6fc67775717dcfeeae13 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Tue, 19 Dec 2023 14:10:11 -0500 Subject: [PATCH] chacha: Move 32-bit Arm CPU dispatch from assembly to C This also removes handling of the empty input, to match what was done for aarch64. (The C code ensures the function is never called in this case.) Bug: 673 Change-Id: I7e868a9eb0b022c22c3f4ba2c8782ae1464c5a52 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64967 Auto-Submit: David Benjamin Reviewed-by: Bob Beck Commit-Queue: Bob Beck --- crypto/chacha/asm/chacha-armv4.pl | 43 ++++++------------------------- crypto/chacha/internal.h | 5 ++-- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl index 24fbb840c2..fd92fdb4fb 100755 --- a/crypto/chacha/asm/chacha-armv4.pl +++ b/crypto/chacha/asm/chacha-armv4.pl @@ -196,39 +196,14 @@ sub ROUND { .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral .Lone: .long 1,0,0,0 -#if __ARM_MAX_ARCH__>=7 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lsigma -#else -.word -1 -#endif -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function +.globl ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function .align 5 -ChaCha20_ctr32: -.LChaCha20_ctr32: +ChaCha20_ctr32_nohw: ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} adr r14,.Lsigma - cmp r2,#0 @ len==0? -#ifdef __thumb2__ - itt eq -#endif - addeq sp,sp,#4*3 - beq .Lno_data -#if __ARM_MAX_ARCH__>=7 - cmp r2,#192 @ test len - bls .Lshort - ldr r4,[r14,#32] - ldr r4,[r14,r4] -# ifdef __APPLE__ - ldr r4,[r4] -# endif - tst r4,#ARMV7_NEON - bne .LChaCha20_neon -.Lshort: -#endif ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area stmdb sp!,{r4-r7} @ copy counter and nonce @@ -621,9 +596,8 @@ sub ROUND { .Ldone: add sp,sp,#4*(32+3) -.Lno_data: ldmia sp!,{r4-r11,pc} -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw ___ {{{ @@ -665,12 +639,12 @@ sub NEONROUND { .arch armv7-a .fpu neon -.type ChaCha20_neon,%function +.globl ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function .align 5 -ChaCha20_neon: +ChaCha20_ctr32_neon: ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -.LChaCha20_neon: adr r14,.Lsigma vstmdb sp!,{d8-d15} @ ABI spec says so stmdb sp!,{r0-r3} @@ -1145,8 +1119,7 @@ sub NEONROUND { vldmia sp,{d8-d15} add sp,sp,#4*(16+3) ldmia sp!,{r4-r11,pc} -.size ChaCha20_neon,.-ChaCha20_neon -.comm OPENSSL_armcap_P,4,4 +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon #endif ___ }}} diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h index a8ae3cb84b..d31a0442ba 100644 --- a/crypto/chacha/internal.h +++ b/crypto/chacha/internal.h @@ -30,11 +30,12 @@ void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32], const uint8_t nonce[16]); #if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM)) + (defined(OPENSSL_X86) || defined(OPENSSL_X86_64)) #define CHACHA20_ASM -#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) +#elif !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) #define CHACHA20_ASM_NOHW