diff --git a/internal/cgen/base/fundamental-public.h b/internal/cgen/base/fundamental-public.h index 5878ec3e..469a7f73 100644 --- a/internal/cgen/base/fundamental-public.h +++ b/internal/cgen/base/fundamental-public.h @@ -44,6 +44,8 @@ #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_CRC32) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) || \ + defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) || \ + defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY) || \ defined(WUFFS_PRIVATE_IMPL__HPD__DECIMAL_POINT__RANGE) || \ defined(WUFFS_PRIVATE_IMPL__HPD__DIGITS_PRECISION) || \ @@ -107,6 +109,8 @@ #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY #if defined(__x86_64__) #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3 #endif // defined(__x86_64__) #endif // !defined(__native_client__) #endif // defined(__i386__) || defined(__x86_64__) @@ -131,6 +135,8 @@ #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY #if defined(_M_X64) #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3 #endif // defined(_M_X64) #else // defined(__AVX__) || defined(__clang__) diff --git a/internal/cgen/base/pixconv-submodule-regular.c b/internal/cgen/base/pixconv-submodule-regular.c index e659d5ea..702b4c3e 100644 --- a/internal/cgen/base/pixconv-submodule-regular.c +++ b/internal/cgen/base/pixconv-submodule-regular.c @@ -10,7 +10,7 @@ // ---------------- Pixel Swizzler -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr, @@ -45,7 +45,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, size_t dst_palette_len, const uint8_t* src_ptr, size_t src_len); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // -------- @@ -881,7 +881,7 @@ wuffs_private_impl__swizzle_swap_rgb_bgr(uint8_t* dst_ptr, } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr, @@ -925,7 +925,7 @@ wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr, } return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -3593,7 +3593,7 @@ wuffs_private_impl__swizzle_bgrw__bgrx(uint8_t* dst_ptr, } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr, @@ -3699,7 +3699,7 @@ wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42(uint8_t* dst_ptr, return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -4457,7 +4457,7 @@ wuffs_private_impl__swizzle_xxxx__index_binary_alpha__src_over( } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, @@ -4504,7 +4504,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -4906,7 +4906,7 @@ wuffs_private_impl__pixel_swizzler__prepare__y( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_xxxx__y__x86_sse42; } @@ -5377,7 +5377,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42; } @@ -5395,7 +5395,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42; } @@ -5484,7 +5484,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_nonpremul( case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -5679,7 +5679,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_premul( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -5751,7 +5751,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42; } @@ -5768,7 +5768,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42; } @@ -5807,7 +5807,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_nonpremul( case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -5923,7 +5923,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_premul( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } diff --git a/internal/cgen/base/pixconv-submodule-x86-avx2.c b/internal/cgen/base/pixconv-submodule-x86-avx2.c index 403013af..bba8dadc 100644 --- a/internal/cgen/base/pixconv-submodule-x86-avx2.c +++ b/internal/cgen/base/pixconv-submodule-x86-avx2.c @@ -11,7 +11,7 @@ // -------- // ‼ WUFFS MULTI-FILE SECTION +x86_avx2 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2") static void // wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2( @@ -636,5 +636,5 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2( return dst_ptr; } #endif -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // ‼ WUFFS MULTI-FILE SECTION -x86_avx2 diff --git a/internal/cgen/base/pixconv-submodule-ycck.c b/internal/cgen/base/pixconv-submodule-ycck.c index 4be1d4c5..1f1ea4b5 100644 --- a/internal/cgen/base/pixconv-submodule-ycck.c +++ b/internal/cgen/base/pixconv-submodule-ycck.c @@ -10,7 +10,7 @@ // -------- -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2") static void // wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2( @@ -47,7 +47,7 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2( bool first_column, bool last_column); #endif -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // -------- @@ -1277,7 +1277,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) if (wuffs_base__cpu_arch__have_x86_avx2()) { conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2; break; @@ -1288,7 +1288,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) if (wuffs_base__cpu_arch__have_x86_avx2()) { conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_rgbx_x86_avx2; break; @@ -1338,7 +1338,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( upfuncs[1][0] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v1_triangle; upfuncs[1][1] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle; -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) #if defined(__GNUC__) && !defined(__clang__) // Don't use our AVX2 implementation for GCC (but do use it for clang). For // some unknown reason, GCC performs noticably better on the non-SIMD diff --git a/internal/cgen/statement.go b/internal/cgen/statement.go index 6e57b662..32a9941a 100644 --- a/internal/cgen/statement.go +++ b/internal/cgen/statement.go @@ -275,27 +275,20 @@ func cpuArchCNames(asserts []*a.Node) (caMacro string, caName string, caAttribut caMacro, caName, caAttribute = "ARM_NEON", "arm_neon", "" case t.IDX86SSE42: caMacro, caName, caAttribute = - "X86_64", // See the "X86_FAMILY" comment, below. + "X86_64_V2", "x86_sse42", "WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"pclmul,popcnt,sse4.2\")" case t.IDX86AVX2: caMacro, caName, caAttribute = - "X86_64", + "X86_64_V3", "x86_avx2", "WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"pclmul,popcnt,sse4.2,avx2\")" case t.IDX86BMI2: caMacro, caName, caAttribute = - "X86_64", // See the "X86_FAMILY" comment, below. + "X86_64_V3", "x86_bmi2", "WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET(\"bmi2\")" } - - // "X86_FAMILY" (which covers both 32-bit and 64-bit x86) is - // technically correct, instead of "X86_64". But some intrinsics - // don't compile in 32-bit mode. It's not worth the hassle to - // support 32-bit x86 SIMD, so we gate on "X86_64" instead. - // - // https://github.com/google/wuffs/issues/145 } } return caMacro, caName, caAttribute, nil diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c index c6f1dce5..f4e55f01 100644 --- a/release/c/wuffs-unsupported-snapshot.c +++ b/release/c/wuffs-unsupported-snapshot.c @@ -100,6 +100,8 @@ extern "C" { #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_CRC32) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) || \ + defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) || \ + defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) || \ defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY) || \ defined(WUFFS_PRIVATE_IMPL__HPD__DECIMAL_POINT__RANGE) || \ defined(WUFFS_PRIVATE_IMPL__HPD__DIGITS_PRECISION) || \ @@ -163,6 +165,8 @@ extern "C" { #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY #if defined(__x86_64__) #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3 #endif // defined(__x86_64__) #endif // !defined(__native_client__) #endif // defined(__i386__) || defined(__x86_64__) @@ -187,6 +191,8 @@ extern "C" { #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_FAMILY #if defined(_M_X64) #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2 +#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3 #endif // defined(_M_X64) #else // defined(__AVX__) || defined(__clang__) @@ -21658,7 +21664,7 @@ wuffs_base__magic_number_guess_fourcc(wuffs_base__slice_u8 prefix_data, // ---------------- Pixel Swizzler -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr, @@ -21693,7 +21699,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, size_t dst_palette_len, const uint8_t* src_ptr, size_t src_len); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // -------- @@ -22529,7 +22535,7 @@ wuffs_private_impl__swizzle_swap_rgb_bgr(uint8_t* dst_ptr, } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr, @@ -22573,7 +22579,7 @@ wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42(uint8_t* dst_ptr, } return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -25241,7 +25247,7 @@ wuffs_private_impl__swizzle_bgrw__bgrx(uint8_t* dst_ptr, } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42(uint8_t* dst_ptr, @@ -25347,7 +25353,7 @@ wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42(uint8_t* dst_ptr, return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -26105,7 +26111,7 @@ wuffs_private_impl__swizzle_xxxx__index_binary_alpha__src_over( } // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") static uint64_t // wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, @@ -26152,7 +26158,7 @@ wuffs_private_impl__swizzle_xxxx__y__x86_sse42(uint8_t* dst_ptr, return len; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 static uint64_t // @@ -26554,7 +26560,7 @@ wuffs_private_impl__pixel_swizzler__prepare__y( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_xxxx__y__x86_sse42; } @@ -27025,7 +27031,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42; } @@ -27043,7 +27049,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgr( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42; } @@ -27132,7 +27138,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_nonpremul( case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -27327,7 +27333,7 @@ wuffs_private_impl__pixel_swizzler__prepare__bgra_premul( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -27399,7 +27405,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__rgb__x86_sse42; } @@ -27416,7 +27422,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgb( case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_BINARY: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_bgrw__bgr__x86_sse42; } @@ -27455,7 +27461,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_nonpremul( case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -27571,7 +27577,7 @@ wuffs_private_impl__pixel_swizzler__prepare__rgba_premul( case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: switch (blend) { case WUFFS_BASE__PIXEL_BLEND__SRC: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) if (wuffs_base__cpu_arch__have_x86_sse42()) { return wuffs_private_impl__swizzle_swap_rgbx_bgrx__x86_sse42; } @@ -27859,7 +27865,7 @@ wuffs_base__pixel_swizzler__swizzle_interleaved_transparent_black( // -------- -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2") static void // wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2( @@ -27896,7 +27902,7 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2( bool first_column, bool last_column); #endif -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // -------- @@ -29126,7 +29132,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( case WUFFS_BASE__PIXEL_FORMAT__BGRA_NONPREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__BGRX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) if (wuffs_base__cpu_arch__have_x86_avx2()) { conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2; break; @@ -29137,7 +29143,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( case WUFFS_BASE__PIXEL_FORMAT__RGBA_NONPREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL: case WUFFS_BASE__PIXEL_FORMAT__RGBX: -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) if (wuffs_base__cpu_arch__have_x86_avx2()) { conv3func = &wuffs_private_impl__swizzle_ycc__convert_3_rgbx_x86_avx2; break; @@ -29187,7 +29193,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( upfuncs[1][0] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v1_triangle; upfuncs[1][1] = wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle; -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) #if defined(__GNUC__) && !defined(__clang__) // Don't use our AVX2 implementation for GCC (but do use it for clang). For // some unknown reason, GCC performs noticably better on the non-SIMD @@ -29239,7 +29245,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck( // -------- // ‼ WUFFS MULTI-FILE SECTION +x86_avx2 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2") static void // wuffs_private_impl__swizzle_ycc__convert_3_bgrx_x86_avx2( @@ -29864,7 +29870,7 @@ wuffs_private_impl__swizzle_ycc__upsample_inv_h2v2_triangle_x86_avx2( return dst_ptr; } #endif -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // ‼ WUFFS MULTI-FILE SECTION -x86_avx2 #endif // !defined(WUFFS_CONFIG__MODULES) || @@ -30126,13 +30132,13 @@ wuffs_adler32__hasher__up_arm_neon( wuffs_base__slice_u8 a_x); #endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_adler32__hasher__up_x86_sse42( wuffs_adler32__hasher* self, wuffs_base__slice_u8 a_x); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ---------------- VTables @@ -30283,7 +30289,7 @@ wuffs_adler32__hasher__update( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) wuffs_base__cpu_arch__have_arm_neon() ? &wuffs_adler32__hasher__up_arm_neon : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_adler32__hasher__up_x86_sse42 : #endif self->private_impl.choosy_up); @@ -30513,7 +30519,7 @@ wuffs_adler32__hasher__up_arm_neon( // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func adler32.hasher.up_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -30600,7 +30606,7 @@ wuffs_adler32__hasher__up_x86_sse42( self->private_impl.f_state = (((v_s2 & 65535u) << 16u) | (v_s1 & 65535u)); return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 #endif // !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__ADLER32) @@ -36366,13 +36372,13 @@ wuffs_crc32__ieee_hasher__up_arm_crc32( wuffs_base__slice_u8 a_x); #endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_CRC32) -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_crc32__ieee_hasher__up_x86_sse42( wuffs_crc32__ieee_hasher* self, wuffs_base__slice_u8 a_x); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ---------------- VTables @@ -36521,7 +36527,7 @@ wuffs_crc32__ieee_hasher__update( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_CRC32) wuffs_base__cpu_arch__have_arm_crc32() ? &wuffs_crc32__ieee_hasher__up_arm_crc32 : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_crc32__ieee_hasher__up_x86_sse42 : #endif self->private_impl.choosy_up); @@ -36749,7 +36755,7 @@ wuffs_crc32__ieee_hasher__up_arm_crc32( // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func crc32.ieee_hasher.up_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -36875,7 +36881,7 @@ wuffs_crc32__ieee_hasher__up_x86_sse42( self->private_impl.f_state = (4294967295u ^ v_s); return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 #endif // !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__CRC32) @@ -37201,13 +37207,13 @@ wuffs_crc64__ecma_hasher__up__choosy_default( wuffs_crc64__ecma_hasher* self, wuffs_base__slice_u8 a_x); -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_crc64__ecma_hasher__up_x86_sse42( wuffs_crc64__ecma_hasher* self, wuffs_base__slice_u8 a_x); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ---------------- VTables @@ -37353,7 +37359,7 @@ wuffs_crc64__ecma_hasher__update( if (self->private_impl.f_state == 0u) { self->private_impl.choosy_up = ( -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_crc64__ecma_hasher__up_x86_sse42 : #endif self->private_impl.choosy_up); @@ -37455,7 +37461,7 @@ wuffs_crc64__ecma_hasher__checksum_u64( // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func crc64.ecma_hasher.up_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -37583,7 +37589,7 @@ wuffs_crc64__ecma_hasher__up_x86_sse42( self->private_impl.f_state = (18446744073709551615u ^ v_s); return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 #endif // !defined(WUFFS_CONFIG__MODULES) || defined(WUFFS_CONFIG__MODULE__CRC64) @@ -37722,14 +37728,14 @@ wuffs_deflate__decoder__init_huff( uint32_t a_n_codes1, uint32_t a_base_symbol); -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__status wuffs_deflate__decoder__decode_huffman_bmi2( wuffs_deflate__decoder* self, wuffs_base__io_buffer* a_dst, wuffs_base__io_buffer* a_src); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__status @@ -38063,7 +38069,7 @@ wuffs_deflate__decoder__do_transform_io( WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0; self->private_impl.choosy_decode_huffman_fast64 = ( -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) wuffs_base__cpu_arch__have_x86_bmi2() ? &wuffs_deflate__decoder__decode_huffman_bmi2 : #endif self->private_impl.choosy_decode_huffman_fast64); @@ -38952,7 +38958,7 @@ wuffs_deflate__decoder__init_huff( // ‼ WUFFS MULTI-FILE SECTION +x86_bmi2 // -------- func deflate.decoder.decode_huffman_bmi2 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("bmi2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__status @@ -39160,7 +39166,7 @@ wuffs_deflate__decoder__decode_huffman_bmi2( return status; } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // ‼ WUFFS MULTI-FILE SECTION -x86_bmi2 // -------- func deflate.decoder.decode_huffman_fast32 @@ -43910,7 +43916,7 @@ wuffs_jpeg__decoder__decode_idct__choosy_default( uint64_t a_dst_stride, uint32_t a_q); -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_jpeg__decoder__decode_idct_x86_avx2( @@ -43918,7 +43924,7 @@ wuffs_jpeg__decoder__decode_idct_x86_avx2( wuffs_base__slice_u8 a_dst_buffer, uint64_t a_dst_stride, uint32_t a_q); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__status @@ -45385,7 +45391,7 @@ wuffs_jpeg__decoder__decode_idct__choosy_default( // ‼ WUFFS MULTI-FILE SECTION +x86_avx2 // -------- func jpeg.decoder.decode_idct_x86_avx2 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2,avx2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -45769,7 +45775,7 @@ wuffs_jpeg__decoder__decode_idct_x86_avx2( wuffs_base__poke_u64le__no_bounds_check(a_dst_buffer.ptr, v_final7); return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) // ‼ WUFFS MULTI-FILE SECTION -x86_avx2 // -------- func jpeg.decoder.get_quirk @@ -46118,7 +46124,7 @@ wuffs_jpeg__decoder__do_decode_image_config( self->private_impl.f_payload_length = 0u; } self->private_impl.choosy_decode_idct = ( -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) wuffs_base__cpu_arch__have_x86_avx2() ? &wuffs_jpeg__decoder__decode_idct_x86_avx2 : #endif self->private_impl.choosy_decode_idct); @@ -60038,40 +60044,40 @@ wuffs_png__decoder__filter_4_distance_4_fallback( wuffs_base__slice_u8 a_curr, wuffs_base__slice_u8 a_prev); -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_png__decoder__filter_1_distance_4_x86_sse42( wuffs_png__decoder* self, wuffs_base__slice_u8 a_curr); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_png__decoder__filter_3_distance_4_x86_sse42( wuffs_png__decoder* self, wuffs_base__slice_u8 a_curr, wuffs_base__slice_u8 a_prev); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_png__decoder__filter_4_distance_3_x86_sse42( wuffs_png__decoder* self, wuffs_base__slice_u8 a_curr, wuffs_base__slice_u8 a_prev); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct wuffs_png__decoder__filter_4_distance_4_x86_sse42( wuffs_png__decoder* self, wuffs_base__slice_u8 a_curr, wuffs_base__slice_u8 a_prev); -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__GENERATED_C_CODE static wuffs_base__status @@ -61390,7 +61396,7 @@ wuffs_png__decoder__filter_4_distance_4_fallback( // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func png.decoder.filter_1_distance_4_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -61431,13 +61437,13 @@ wuffs_png__decoder__filter_1_distance_4_x86_sse42( } return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func png.decoder.filter_3_distance_4_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -61537,13 +61543,13 @@ wuffs_png__decoder__filter_3_distance_4_x86_sse42( } return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func png.decoder.filter_4_distance_3_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -61664,13 +61670,13 @@ wuffs_png__decoder__filter_4_distance_3_x86_sse42( } return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 // ‼ WUFFS MULTI-FILE SECTION +x86_sse42 // -------- func png.decoder.filter_4_distance_4_x86_sse42 -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) WUFFS_BASE__MAYBE_ATTRIBUTE_TARGET("pclmul,popcnt,sse4.2") WUFFS_BASE__GENERATED_C_CODE static wuffs_base__empty_struct @@ -61769,7 +61775,7 @@ wuffs_png__decoder__filter_4_distance_4_x86_sse42( } return wuffs_base__make_empty_struct(); } -#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) // ‼ WUFFS MULTI-FILE SECTION -x86_sse42 // -------- func png.decoder.get_quirk @@ -62516,7 +62522,7 @@ wuffs_png__decoder__choose_filter_implementations( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) wuffs_base__cpu_arch__have_arm_neon() ? &wuffs_png__decoder__filter_4_distance_3_arm_neon : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_png__decoder__filter_4_distance_3_x86_sse42 : #endif &wuffs_png__decoder__filter_4_distance_3_fallback); @@ -62525,7 +62531,7 @@ wuffs_png__decoder__choose_filter_implementations( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) wuffs_base__cpu_arch__have_arm_neon() ? &wuffs_png__decoder__filter_1_distance_4_arm_neon : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_png__decoder__filter_1_distance_4_x86_sse42 : #endif &wuffs_png__decoder__filter_1_distance_4_fallback); @@ -62533,7 +62539,7 @@ wuffs_png__decoder__choose_filter_implementations( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) wuffs_base__cpu_arch__have_arm_neon() ? &wuffs_png__decoder__filter_3_distance_4_arm_neon : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_png__decoder__filter_3_distance_4_x86_sse42 : #endif &wuffs_png__decoder__filter_3_distance_4_fallback); @@ -62541,7 +62547,7 @@ wuffs_png__decoder__choose_filter_implementations( #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__ARM_NEON) wuffs_base__cpu_arch__have_arm_neon() ? &wuffs_png__decoder__filter_4_distance_4_arm_neon : #endif -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2) wuffs_base__cpu_arch__have_x86_sse42() ? &wuffs_png__decoder__filter_4_distance_4_x86_sse42 : #endif &wuffs_png__decoder__filter_4_distance_4_fallback); diff --git a/test/c/std/jpeg.c b/test/c/std/jpeg.c index 0ba9509e..3ccccb66 100644 --- a/test/c/std/jpeg.c +++ b/test/c/std/jpeg.c @@ -590,7 +590,7 @@ test_wuffs_jpeg_decode_idct() { if (f == 0) { func_name = "choosy_default"; func = &wuffs_jpeg__decoder__decode_idct__choosy_default; -#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64) +#if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3) } else if (wuffs_base__cpu_arch__have_x86_avx2()) { func_name = "x86_avx2"; func = &wuffs_jpeg__decoder__decode_idct_x86_avx2;