Skip to content

Commit

Permalink
base: add WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2
Browse files Browse the repository at this point in the history
Updates #148
  • Loading branch information
nigeltao committed Jul 8, 2024
1 parent 97b727a commit affa1e0
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 104 deletions.
6 changes: 6 additions & 0 deletions internal/cgen/base/fundamental-public.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_CRC32) || \
defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) || \
defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) || \
defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) || \
defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) || \
defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY) || \
defined(WUFFS_PRIVATE_IMPL__HPD__DECIMAL_POINT__RANGE) || \
defined(WUFFS_PRIVATE_IMPL__HPD__DIGITS_PRECISION) || \
Expand Down Expand Up @@ -107,6 +109,8 @@
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY
#if defined(__x86_64__)
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3
#endif // defined(__x86_64__)
#endif // !defined(__native_client__)
#endif // defined(__i386__) || defined(__x86_64__)
Expand All @@ -131,6 +135,8 @@
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY
#if defined(_M_X64)
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3
#endif // defined(_M_X64)

#else // defined(__AVX__) || defined(__clang__)
Expand Down
34 changes: 17 additions & 17 deletions internal/cgen/base/pixconv-submodule-regular.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

// ---------------- Pixel Swizzler

#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2")
static uint64_t //
wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr,
Expand Down Expand Up @@ -45,7 +45,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr,
size_t dst_palette_len,
const uint8_t* src_ptr,
size_t src_len);
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)

// --------

Expand Down Expand Up @@ -881,7 +881,7 @@ wuffs_private_impl__swizzle_swap_rgb_bgr(uint8_t* dst_ptr,
}

// ‼ WUFFS MULTI-FILE SECTION +x86_sse42
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2")
static uint64_t //
wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr,
Expand Down Expand Up @@ -925,7 +925,7 @@ wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr,
}
return len;
}
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
// ‼ WUFFS MULTI-FILE SECTION -x86_sse42

static uint64_t //
Expand Down Expand Up @@ -3593,7 +3593,7 @@ wuffs_private_impl__swizzle_bgrw__bgrx(uint8_t* dst_ptr,
}

// ‼ WUFFS MULTI-FILE SECTION +x86_sse42
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2")
static uint64_t //
wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr,
Expand Down Expand Up @@ -3699,7 +3699,7 @@ wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42(uint8_t* dst_ptr,

return len;
}
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
// ‼ WUFFS MULTI-FILE SECTION -x86_sse42

static uint64_t //
Expand Down Expand Up @@ -4457,7 +4457,7 @@ wuffs_private_impl__swizzle_xxxx__index_binary_alpha__src_over(
}

// ‼ WUFFS MULTI-FILE SECTION +x86_sse42
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2")
static uint64_t //
wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr,
Expand Down Expand Up @@ -4504,7 +4504,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr,

return len;
}
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
// ‼ WUFFS MULTI-FILE SECTION -x86_sse42

static uint64_t //
Expand Down Expand Up @@ -4906,7 +4906,7 @@ wuffs_private_impl__pixel_swizzler__prepare__y(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__RGBX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_xxxx__y__x86_sse42;
}
Expand Down Expand Up @@ -5377,7 +5377,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr(
case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__BGRX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42;
}
Expand All @@ -5395,7 +5395,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__RGBX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42;
}
Expand Down Expand Up @@ -5484,7 +5484,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_nonpremul(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
switch (blend) {
case WUFFS_BASE__PIXEL_BLEND__SRC:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42;
}
Expand Down Expand Up @@ -5679,7 +5679,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_premul(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
switch (blend) {
case WUFFS_BASE__PIXEL_BLEND__SRC:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42;
}
Expand Down Expand Up @@ -5751,7 +5751,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb(
case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__BGRX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42;
}
Expand All @@ -5768,7 +5768,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY:
case WUFFS_BASE__PIXEL_FORMAT__RGBX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42;
}
Expand Down Expand Up @@ -5807,7 +5807,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_nonpremul(
case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
switch (blend) {
case WUFFS_BASE__PIXEL_BLEND__SRC:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42;
}
Expand Down Expand Up @@ -5923,7 +5923,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_premul(
case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
switch (blend) {
case WUFFS_BASE__PIXEL_BLEND__SRC:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2)
if (wuffs_base__cpu_arch__have_x86_sse42()) {
return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42;
}
Expand Down
4 changes: 2 additions & 2 deletions internal/cgen/base/pixconv-submodule-x86-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// --------

// ‼ WUFFS MULTI-FILE SECTION +x86_avx2
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2")
static void //
wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2(
Expand Down Expand Up @@ -636,5 +636,5 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2(
return dst_ptr;
}
#endif
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
// ‼ WUFFS MULTI-FILE SECTION -x86_avx2
10 changes: 5 additions & 5 deletions internal/cgen/base/pixconv-submodule-ycck.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

// --------

#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2")
static void //
wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2(
Expand Down Expand Up @@ -47,7 +47,7 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2(
bool first_column,
bool last_column);
#endif
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)

// --------

Expand Down Expand Up @@ -1277,7 +1277,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck(
case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__BGRX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
if (wuffs_base__cpu_arch__have_x86_avx2()) {
conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2;
break;
Expand All @@ -1288,7 +1288,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck(
case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL:
case WUFFS_BASE__PIXEL_FORMAT__RGBX:
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
if (wuffs_base__cpu_arch__have_x86_avx2()) {
conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_rgbx_x86_avx2;
break;
Expand Down Expand Up @@ -1338,7 +1338,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck(
upfuncs[1][0] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v1_triangle;
upfuncs[1][1] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle;

#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3)
#if defined(__GNUC__) && !defined(__clang__)
// Don't use our AVX2 implementation for GCC (but do use it for clang). For
// some unknown reason, GCC performs noticably better on the non-SIMD
Expand Down
13 changes: 3 additions & 10 deletions internal/cgen/statement.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,27 +275,20 @@ func cpuArchCNames(asserts []*a.Node) (caMacro string, caName string, caAttribut
caMacro, caName, caAttribute = "ARM_NEON", "arm_neon", ""
case t.IDX86SSE42:
caMacro, caName, caAttribute =
"X86_64", // See the "X86_FAMILY" comment, below.
"X86_64_V2",
"x86_sse42",
"WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"pclmul,popcnt,sse4.2\")"
case t.IDX86AVX2:
caMacro, caName, caAttribute =
"X86_64",
"X86_64_V3",
"x86_avx2",
"WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"pclmul,popcnt,sse4.2,avx2\")"
case t.IDX86BMI2:
caMacro, caName, caAttribute =
"X86_64", // See the "X86_FAMILY" comment, below.
"X86_64_V3",
"x86_bmi2",
"WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"bmi2\")"
}

// "X86_FAMILY" (which covers both 32-bit and 64-bit x86) is
// technically correct, instead of "X86_64". But some intrinsics
// don't compile in 32-bit mode. It's not worth the hassle to
// support 32-bit x86 SIMD, so we gate on "X86_64" instead.
//
// https://github.com/google/wuffs/issues/145
}
}
return caMacro, caName, caAttribute, nil
Expand Down
Loading

0 comments on commit affa1e0

Please sign in to comment.