Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A patch for Visual Studio on Windows #3

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions benchmark/areion-benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

#define NUMBER_OF_LOOPS 12500000

#ifdef _MSC_VER
#include <malloc.h>
#define ALLOCA(x) _malloca(x)
#else
#define ALLOCA(x) __builtin_alloca(x)
#endif

static void fill(uint8_t *dst, size_t len)
{
for (size_t i = 0; i < len; i++) {
Expand Down Expand Up @@ -83,20 +90,20 @@ static void benchmark_aead()
{
{
for (int len = 32; len < 4096; len *= 2) {
uint8_t in[len];
uint8_t out[len];
uint8_t *in = (uint8_t*)ALLOCA(len);
uint8_t *out = (uint8_t*)ALLOCA(len);
uint8_t tag[16];
uint8_t h[16];
uint8_t n[16];
uint8_t k[16];
fill(in, sizeof in);
fill(in, len);
fill(h, sizeof h);
fill(n, sizeof n);
fill(k, sizeof k);

ticks t0 = getticks();
for (int i = 0; i < NUMBER_OF_LOOPS; i++) {
encrypt_areion_256_opp(out, tag, h, sizeof h, in, sizeof in, n, k);
encrypt_areion_256_opp(out, tag, h, sizeof h, in, len, n, k);
}
ticks t1 = getticks();
double total_cycle = elapsed(t1, t0);
Expand Down
60 changes: 24 additions & 36 deletions ref/areion.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,68 +179,56 @@ void inverse_areion_512(__m128i dst[4], const __m128i src[4])

void permute_areion_256u8(uint8_t dst[32], const uint8_t src[32])
{
const __m128i_u *src_p = (const __m128i_u *)src;
__m128i_u *dst_p = (__m128i_u *)dst;

__m128i x[2] = {
_mm_loadu_si128(&src_p[0]),
_mm_loadu_si128(&src_p[1])
_mm_loadu_si128((const __m128i*)&src[0]),
_mm_loadu_si128((const __m128i*)&src[16])
};
__m128i y[2];
permute_areion_256(y, x);
_mm_storeu_si128(&dst_p[0], y[0]);
_mm_storeu_si128(&dst_p[1], y[1]);
_mm_storeu_si128((__m128i*)&dst[0], y[0]);
_mm_storeu_si128((__m128i*)&dst[16], y[1]);
}

void inverse_areion_256u8(uint8_t dst[32], const uint8_t src[32])
{
const __m128i_u *src_p = (const __m128i_u *)src;
__m128i_u *dst_p = (__m128i_u *)dst;

__m128i x[2] = {
_mm_loadu_si128(&src_p[0]),
_mm_loadu_si128(&src_p[1])
_mm_loadu_si128((const __m128i*)&src[0]),
_mm_loadu_si128((const __m128i*)&src[16])
};
__m128i y[2];
inverse_areion_256(y, x);
_mm_storeu_si128(&dst_p[0], y[0]);
_mm_storeu_si128(&dst_p[1], y[1]);
_mm_storeu_si128((__m128i*)&dst[0], y[0]);
_mm_storeu_si128((__m128i*)&dst[16], y[1]);
}

void permute_areion_512u8(uint8_t dst[64], const uint8_t src[64])
{
const __m128i_u *src_p = (const __m128i_u *)src;
__m128i_u *dst_p = (__m128i_u *)dst;

__m128i x[4] = {
_mm_loadu_si128(&src_p[0]),
_mm_loadu_si128(&src_p[1]),
_mm_loadu_si128(&src_p[2]),
_mm_loadu_si128(&src_p[3])
_mm_loadu_si128((const __m128i*)&src[0]),
_mm_loadu_si128((const __m128i*)&src[16]),
_mm_loadu_si128((const __m128i*)&src[32]),
_mm_loadu_si128((const __m128i*)&src[48])
};
__m128i y[4];
permute_areion_512(y, x);
_mm_storeu_si128(&dst_p[0], y[0]);
_mm_storeu_si128(&dst_p[1], y[1]);
_mm_storeu_si128(&dst_p[2], y[2]);
_mm_storeu_si128(&dst_p[3], y[3]);
_mm_storeu_si128((__m128i*)&dst[0], y[0]);
_mm_storeu_si128((__m128i*)&dst[16], y[1]);
_mm_storeu_si128((__m128i*)&dst[32], y[2]);
_mm_storeu_si128((__m128i*)&dst[48], y[3]);
}

void inverse_areion_512u8(uint8_t dst[64], const uint8_t src[64])
{
const __m128i_u *src_p = (const __m128i_u *)src;
__m128i_u *dst_p = (__m128i_u *)dst;

__m128i x[4] = {
_mm_loadu_si128(&src_p[0]),
_mm_loadu_si128(&src_p[1]),
_mm_loadu_si128(&src_p[2]),
_mm_loadu_si128(&src_p[3])
_mm_loadu_si128((const __m128i*)&src[0]),
_mm_loadu_si128((const __m128i*)&src[16]),
_mm_loadu_si128((const __m128i*)&src[32]),
_mm_loadu_si128((const __m128i*)&src[48])
};
__m128i y[4];
inverse_areion_512(y, x);
_mm_storeu_si128(&dst_p[0], y[0]);
_mm_storeu_si128(&dst_p[1], y[1]);
_mm_storeu_si128(&dst_p[2], y[2]);
_mm_storeu_si128(&dst_p[3], y[3]);
_mm_storeu_si128((__m128i*)&dst[0], y[0]);
_mm_storeu_si128((__m128i*)&dst[16], y[1]);
_mm_storeu_si128((__m128i*)&dst[32], y[2]);
_mm_storeu_si128((__m128i*)&dst[48], y[3]);
}