From 3f302cabb81c9a016ab8af0489e5eab6c9d63876 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Jul 2018 18:53:05 +0300 Subject: [PATCH] core(test): intrinsic tests for all dispatched CPU optimizations - tests for both SIMD128 / SIMD256 - different dispatched + baseline(SIMD128) intrinsics --- cmake/OpenCVCompilerOptimizations.cmake | 33 +- cmake/OpenCVModule.cmake | 6 + modules/core/CMakeLists.txt | 4 + .../core/include/opencv2/core/hal/intrin.hpp | 37 +- modules/core/test/test_intrin.avx2.cpp | 5 - modules/core/test/test_intrin.cpp | 151 ++++---- modules/core/test/test_intrin.fp16.cpp | 19 - modules/core/test/test_intrin.simd.hpp | 296 --------------- modules/core/test/test_intrin128.simd.hpp | 22 ++ modules/core/test/test_intrin256.simd.hpp | 23 ++ modules/core/test/test_intrin_utils.hpp | 356 +++++++++++++++++- modules/core/test/test_precomp.hpp | 1 - 12 files changed, 527 insertions(+), 426 deletions(-) delete mode 100644 modules/core/test/test_intrin.avx2.cpp delete mode 100644 modules/core/test/test_intrin.fp16.cpp delete mode 100644 modules/core/test/test_intrin.simd.hpp create mode 100644 modules/core/test/test_intrin128.simd.hpp create mode 100644 modules/core/test/test_intrin256.simd.hpp diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 8beabefe41c4..377eb98a6528 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -761,24 +761,24 @@ macro(ocv_compiler_optimization_fill_cpu_config) endif() endmacro() -macro(ocv_add_dispatched_file filename) +macro(__ocv_add_dispatched_file filename target_src_var src_directory dst_directory precomp_hpp optimizations_var) if(NOT OPENCV_INITIAL_PASS) set(__codestr " -#include \"${CMAKE_CURRENT_LIST_DIR}/src/precomp.hpp\" -#include \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\" +#include \"${src_directory}/${precomp_hpp}\" +#include \"${src_directory}/${filename}.simd.hpp\" ") - set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\"") + set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${src_directory}/${filename}.simd.hpp\"") set(__dispatch_modes "BASELINE") - set(__optimizations "${ARGN}") + set(__optimizations "${${optimizations_var}}") if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) set(__optimizations "") endif() foreach(OPT ${__optimizations}) string(TOLOWER "${OPT}" OPT_LOWER) - set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp") + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.${OPT_LOWER}.cpp") if(EXISTS "${__file}") file(READ "${__file}" __content) else() @@ -791,7 +791,11 @@ macro(ocv_add_dispatched_file filename) endif() if(";${CPU_DISPATCH};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL) - list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}") + if(EXISTS "${src_directory}/${filename}.${OPT_LOWER}.cpp") + message(STATUS "Using overrided ${OPT} source: ${src_directory}/${filename}.${OPT_LOWER}.cpp") + else() + list(APPEND ${target_src_var} "${__file}") + endif() endif() set(__declarations_str "${__declarations_str} @@ -803,9 +807,11 @@ macro(ocv_add_dispatched_file filename) set(__declarations_str "${__declarations_str} #define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes} + +#undef CV_CPU_SIMD_FILENAME ") - set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp") + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.simd_declarations.hpp") if(EXISTS "${__file}") file(READ "${__file}" __content) endif() @@ -817,6 +823,17 @@ macro(ocv_add_dispatched_file filename) endif() endmacro() +macro(ocv_add_dispatched_file filename) + set(__optimizations "${ARGN}") + if(" ${ARGV1}" STREQUAL " TEST") + list(REMOVE_AT __optimizations 0) + __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/test" "test/" "test_precomp.hpp" __optimizations) + else() + __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/src" "" "precomp.hpp" __optimizations) + endif() +endmacro() + + # Workaround to support code which always require all code paths macro(ocv_add_dispatched_file_force_all) set(__CPU_DISPATCH_INCLUDE_ALL 1) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 00d15dc6d95e..54f100d3cf7b 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -1202,6 +1202,9 @@ function(ocv_add_accuracy_tests) set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs}) endif() + if(OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED) + list(APPEND OPENCV_TEST_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED}) + endif() ocv_compiler_optimization_process_sources(OPENCV_TEST_${the_module}_SOURCES OPENCV_TEST_${the_module}_DEPS ${the_target}) if(NOT BUILD_opencv_world) @@ -1211,6 +1214,9 @@ function(ocv_add_accuracy_tests) source_group("Src" FILES "${${the_target}_pch}") ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch}) ocv_target_include_modules(${the_target} ${test_deps} "${test_path}") + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/test") + ocv_target_include_directories(${the_target} "${CMAKE_CURRENT_BINARY_DIR}/test") + endif() ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS}) add_dependencies(opencv_tests ${the_target}) diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 1997c906bcde..455afaf59386 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -3,6 +3,10 @@ set(the_description "The Core Functionality") ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2) ocv_add_dispatched_file(stat SSE4_2 AVX2) +# dispatching for accuracy tests +ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2) +ocv_add_dispatched_file_force_all(test_intrin256 TEST AVX2) + ocv_add_module(core OPTIONAL opencv_cudev WRAP java python js) diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 263659d3023c..31504a6291ee 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -204,20 +204,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_SIMD512_64F 0 #endif -#if CV_SIMD512 - #define CV_SIMD 1 - #define CV_SIMD_64F CV_SIMD512_64F - #define CV_SIMD_WIDTH 64 -#elif CV_SIMD256 - #define CV_SIMD 1 - #define CV_SIMD_64F CV_SIMD256_64F - #define CV_SIMD_WIDTH 32 -#else - #define CV_SIMD CV_SIMD128 - #define CV_SIMD_64F CV_SIMD128_64F - #define CV_SIMD_WIDTH 16 -#endif - //================================================================================================== #define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ @@ -309,7 +295,15 @@ template struct V_RegTraits #endif #endif -#if CV_SIMD256 +#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD512_64F + #define CV_SIMD_WIDTH 64 + // TODO typedef v_uint8 / v_int32 / etc types here +#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD256_64F + #define CV_SIMD_WIDTH 32 typedef v_uint8x32 v_uint8; typedef v_int8x32 v_int8; typedef v_uint16x16 v_uint16; @@ -329,7 +323,10 @@ template struct V_RegTraits CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) inline void vx_cleanup() { v256_cleanup(); } -#elif CV_SIMD128 || CV_SIMD128_CPP +#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) + #define CV_SIMD CV_SIMD128 + #define CV_SIMD_64F CV_SIMD128_64F + #define CV_SIMD_WIDTH 16 typedef v_uint8x16 v_uint8; typedef v_int8x16 v_int8; typedef v_uint16x8 v_uint16; @@ -380,6 +377,14 @@ inline unsigned int trailingZeros32(unsigned int value) { CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END #endif +#ifndef CV_SIMD_64F +#define CV_SIMD_64F 0 +#endif + +#ifndef CV_SIMD +#define CV_SIMD 0 +#endif + } // cv:: //! @endcond diff --git a/modules/core/test/test_intrin.avx2.cpp b/modules/core/test/test_intrin.avx2.cpp deleted file mode 100644 index 9ebfcdf542b6..000000000000 --- a/modules/core/test/test_intrin.avx2.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin.simd.hpp" \ No newline at end of file diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 6610e332de7f..602877382d3a 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -2,101 +2,100 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -#include "test_intrin.simd.hpp" -#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp" -#define CV_CPU_DISPATCH_MODE FP16 -#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +#include "test_intrin128.simd.hpp" +#include "test_intrin128.simd_declarations.hpp" -#define CV_CPU_DISPATCH_MODE AVX2 -#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +#undef CV_CPU_DISPATCH_MODES_ALL -namespace opencv_test { namespace hal { -using namespace CV_CPU_OPTIMIZATION_NAMESPACE; - -TEST(hal_intrin, uint8x16) -{ test_hal_intrin_uint8(); } - -TEST(hal_intrin, int8x16) -{ test_hal_intrin_int8(); } - -TEST(hal_intrin, uint16x8) -{ test_hal_intrin_uint16(); } - -TEST(hal_intrin, int16x8) -{ test_hal_intrin_int16(); } - -TEST(hal_intrin, int32x4) -{ test_hal_intrin_int32(); } - -TEST(hal_intrin, uint32x4) -{ test_hal_intrin_uint32(); } - -TEST(hal_intrin, uint64x2) -{ test_hal_intrin_uint64(); } - -TEST(hal_intrin, int64x2) -{ test_hal_intrin_int64(); } +#include "opencv2/core/cv_cpu_dispatch.h" +#include "test_intrin256.simd.hpp" +#include "test_intrin256.simd_declarations.hpp" -TEST(hal_intrin, float32x4) -{ test_hal_intrin_float32(); } -TEST(hal_intrin, float64x2) -{ test_hal_intrin_float64(); } +namespace opencv_test { namespace hal { -TEST(hal_intrin, float16x8) +#define CV_CPU_CALL_BASELINE_(fn, args) CV_CPU_CALL_BASELINE(fn, args) + +#define DISPATCH_SIMD128(fn, cpu_opt) do { \ + CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \ + throw SkipTestException("SIMD128 (" #cpu_opt ") is not available or disabled"); \ +} while(0) + +#define DISPATCH_SIMD256(fn, cpu_opt) do { \ + CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \ + throw SkipTestException("SIMD256 (" #cpu_opt ") is not available or disabled"); \ +} while(0) + +#define DEFINE_SIMD_TESTS(simd_size, cpu_opt) \ +TEST(hal_intrin ## simd_size, uint8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint8, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int8, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint16, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int16, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint64, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int64, cpu_opt); } \ +TEST(hal_intrin ## simd_size, float32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, float64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float64, cpu_opt); } \ + +namespace intrin128 { + +DEFINE_SIMD_TESTS(128, BASELINE) + +#if defined CV_CPU_DISPATCH_COMPILE_SSE2 || defined CV_CPU_BASELINE_COMPILE_SSE2 +DEFINE_SIMD_TESTS(128, SSE2) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE3 || defined CV_CPU_BASELINE_COMPILE_SSE3 +DEFINE_SIMD_TESTS(128, SSE3) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSSE3 || defined CV_CPU_BASELINE_COMPILE_SSSE3 +DEFINE_SIMD_TESTS(128, SSSE3) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE4_1 || defined CV_CPU_BASELINE_COMPILE_SSE4_1 +DEFINE_SIMD_TESTS(128, SSE4_1) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE4_2 || defined CV_CPU_BASELINE_COMPILE_SSE4_2 +DEFINE_SIMD_TESTS(128, SSE4_2) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_AVX || defined CV_CPU_BASELINE_COMPILE_AVX +DEFINE_SIMD_TESTS(128, AVX) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2 +DEFINE_SIMD_TESTS(128, AVX2) +#endif + +TEST(hal_intrin128, float16x8_FP16) { CV_CPU_CALL_FP16_(test_hal_intrin_float16, ()); throw SkipTestException("Unsupported hardware: FP16 is not available"); } -#define DISPATCH_SIMD_MODES AVX2 -#define DISPATCH_SIMD_NAME "SIMD256" -#define DISPATCH_SIMD(fun) \ - do { \ - CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \ - throw SkipTestException( \ - "Unsupported hardware: " \ - DISPATCH_SIMD_NAME \ - " is not available" \ - ); \ - } while(0) - -TEST(hal_intrin256, uint8x32) -{ DISPATCH_SIMD(test_hal_intrin_uint8); } - -TEST(hal_intrin256, int8x32) -{ DISPATCH_SIMD(test_hal_intrin_int8); } - -TEST(hal_intrin256, uint16x16) -{ DISPATCH_SIMD(test_hal_intrin_uint16); } +} // namespace intrin128 -TEST(hal_intrin256, int16x16) -{ DISPATCH_SIMD(test_hal_intrin_int16); } -TEST(hal_intrin256, uint32x8) -{ DISPATCH_SIMD(test_hal_intrin_uint32); } +namespace intrin256 { -TEST(hal_intrin256, int32x8) -{ DISPATCH_SIMD(test_hal_intrin_int32); } -TEST(hal_intrin256, uint64x4) -{ DISPATCH_SIMD(test_hal_intrin_uint64); } +// Not available due missing C++ backend for SIMD256 +//DEFINE_SIMD_TESTS(256, BASELINE) -TEST(hal_intrin256, int64x4) -{ DISPATCH_SIMD(test_hal_intrin_int64); } +//#if defined CV_CPU_DISPATCH_COMPILE_AVX +//DEFINE_SIMD_TESTS(256, AVX) +//#endif -TEST(hal_intrin256, float32x8) -{ DISPATCH_SIMD(test_hal_intrin_float32); } +#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2 +DEFINE_SIMD_TESTS(256, AVX2) +#endif -TEST(hal_intrin256, float64x4) -{ DISPATCH_SIMD(test_hal_intrin_float64); } - -TEST(hal_intrin256, float16x16) +TEST(hal_intrin256, float16x16_FP16) { - if (!CV_CPU_HAS_SUPPORT_FP16) - throw SkipTestException("Unsupported hardware: FP16 is not available"); - DISPATCH_SIMD(test_hal_intrin_float16); + //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ()); + CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ()); + throw SkipTestException("Unsupported hardware: FP16 is not available"); } + +} // namespace intrin256 + }} // namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin.fp16.cpp b/modules/core/test/test_intrin.fp16.cpp deleted file mode 100644 index 9f6416bcf8df..000000000000 --- a/modules/core/test/test_intrin.fp16.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin_utils.hpp" - -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN - -void test_hal_intrin_float16() -{ - TheTest() - .test_loadstore_fp16() - .test_float_cvt_fp16() - ; -} - -CV_CPU_OPTIMIZATION_NAMESPACE_END -}} // namespace diff --git a/modules/core/test/test_intrin.simd.hpp b/modules/core/test/test_intrin.simd.hpp deleted file mode 100644 index 4e0d3a073fb6..000000000000 --- a/modules/core/test/test_intrin.simd.hpp +++ /dev/null @@ -1,296 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin_utils.hpp" - -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN - -void test_hal_intrin_uint8(); -void test_hal_intrin_int8(); -void test_hal_intrin_uint16(); -void test_hal_intrin_int16(); -void test_hal_intrin_uint32(); -void test_hal_intrin_int32(); -void test_hal_intrin_uint64(); -void test_hal_intrin_int64(); -void test_hal_intrin_float32(); -void test_hal_intrin_float64(); - -#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY - -//============= 8-bit integer ===================================================================== - -void test_hal_intrin_uint8() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; - -#if CV_SIMD256 - TheTest() - .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>() - .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>() - .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>() - .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>() - ; -#endif -} - -void test_hal_intrin_int8() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; -} - -//============= 16-bit integer ===================================================================== - -void test_hal_intrin_uint16() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} - -void test_hal_intrin_int16() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_dot_prod() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} - -//============= 32-bit integer ===================================================================== - -void test_hal_intrin_uint32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_transpose() - ; -} - -void test_hal_intrin_int32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_abs() - .test_cmp() - .test_popcount() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_float_cvt32() - .test_float_cvt64() - .test_transpose() - ; -} - -//============= 64-bit integer ===================================================================== - -void test_hal_intrin_uint64() -{ - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} - -void test_hal_intrin_int64() -{ - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} - -//============= Floating point ===================================================================== -void test_hal_intrin_float32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_interleave_2channel() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_reduce() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt64() - .test_matmul() - .test_transpose() - .test_reduce_sum4() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - ; - -#if CV_SIMD256 - TheTest() - .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() - .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() - ; -#endif -} - -void test_hal_intrin_float64() -{ -#if CV_SIMD_64F - TheTest() - .test_loadstore() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt32() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; - -#if CV_SIMD256 - TheTest() - .test_extract<2>().test_extract<3>() - .test_rotate<2>().test_rotate<3>() - ; -#endif //CV_SIMD256 - -#endif -} - -#if CV_FP16 && CV_SIMD_WIDTH > 16 -void test_hal_intrin_float16() -{ - TheTest() - .test_loadstore_fp16() - .test_float_cvt_fp16() - ; -} -#endif - -#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY - -CV_CPU_OPTIMIZATION_NAMESPACE_END - -}} //namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin128.simd.hpp b/modules/core/test/test_intrin128.simd.hpp new file mode 100644 index 000000000000..1d9bee2d331d --- /dev/null +++ b/modules/core/test/test_intrin128.simd.hpp @@ -0,0 +1,22 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#define CV__SIMD_FORCE_WIDTH 128 +#include "opencv2/core/hal/intrin.hpp" +#undef CV__SIMD_FORCE_WIDTH + +#if CV_SIMD_WIDTH != 16 +#error "Invalid build configuration" +#endif + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace opencv_test { namespace hal { namespace intrin128 { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +#include "test_intrin_utils.hpp" + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}}} //namespace diff --git a/modules/core/test/test_intrin256.simd.hpp b/modules/core/test/test_intrin256.simd.hpp new file mode 100644 index 000000000000..a5e2cd522175 --- /dev/null +++ b/modules/core/test/test_intrin256.simd.hpp @@ -0,0 +1,23 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#if !defined CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY && \ + !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS // TODO? C++ fallback implementation for SIMD256 + +#define CV__SIMD_FORCE_WIDTH 256 +#include "opencv2/core/hal/intrin.hpp" +#undef CV__SIMD_FORCE_WIDTH + +#if CV_SIMD_WIDTH != 32 +#error "Invalid build configuration" +#endif + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace opencv_test { namespace hal { namespace intrin256 { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +#include "test_intrin_utils.hpp" + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}}} //namespace diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 5f3175bc6cbc..cc9de4fc7517 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1,10 +1,22 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -#include "opencv2/core/hal/intrin.hpp" -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// This file is not standalone. +// It is included with these active namespaces: +//namespace opencv_test { namespace hal { namespace intrinXXX { +//CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void test_hal_intrin_uint8(); +void test_hal_intrin_int8(); +void test_hal_intrin_uint16(); +void test_hal_intrin_int16(); +void test_hal_intrin_uint32(); +void test_hal_intrin_int32(); +void test_hal_intrin_uint64(); +void test_hal_intrin_int64(); +void test_hal_intrin_float32(); +void test_hal_intrin_float64(); void test_hal_intrin_float16(); @@ -258,6 +270,7 @@ template struct TheTest v_store(out.u.d, r_low); for (int i = 0; i < R::nlanes/2; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]); } @@ -266,6 +279,7 @@ template struct TheTest v_store(out.u.d, r_low_align8byte); for (int i = 0; i < R::nlanes/2; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]); } @@ -296,6 +310,7 @@ template struct TheTest resV.fill((LaneType)8); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)0, resZ[i]); EXPECT_EQ((LaneType)8, resV[i]); } @@ -342,6 +357,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data1, Data(a)); EXPECT_EQ(data2, Data(b)); EXPECT_EQ(data3, Data(c)); @@ -374,6 +390,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data1, Data(a)); EXPECT_EQ(data2, Data(b)); } @@ -397,6 +414,7 @@ template struct TheTest const int n = Rx2::nlanes; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i], resB[i]); EXPECT_EQ(dataA[i], resC[i]); EXPECT_EQ(dataA[i + n], resD[i]); @@ -412,7 +430,10 @@ template struct TheTest Data out = vx_load_expand_q(data.d); const int n = Rx4::nlanes; for (int i = 0; i < n; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data[i], out[i]); + } return *this; } @@ -426,6 +447,7 @@ template struct TheTest Data resC = a + b, resD = a - b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(saturate_cast(dataA[i] + dataB[i]), resC[i]); EXPECT_EQ(saturate_cast(dataA[i] - dataB[i]), resD[i]); } @@ -443,6 +465,7 @@ template struct TheTest resD = v_sub_wrap(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]); EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]); } @@ -458,6 +481,7 @@ template struct TheTest Data resC = a * b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] * dataB[i], resC[i]); } @@ -473,6 +497,7 @@ template struct TheTest Data resC = a / b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] / dataB[i], resC[i]); } @@ -492,6 +517,7 @@ template struct TheTest const int n = R::nlanes / 2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]); EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]); } @@ -511,6 +537,7 @@ template struct TheTest for (int i = 0; i < Ru::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]); } @@ -529,6 +556,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(static_cast(dataA[i] << s), resB[i]); EXPECT_EQ(static_cast(dataA[i] << s), resC[i]); EXPECT_EQ(static_cast(dataA[i] >> s), resD[i]); @@ -553,6 +581,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0); @@ -583,6 +612,7 @@ template struct TheTest const int n = R::nlanes / 2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]); EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]); } @@ -597,6 +627,7 @@ template struct TheTest Data resC = a & b, resD = a | b, resE = a ^ b, resF = ~a; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] & dataB[i], resC[i]); EXPECT_EQ(dataA[i] | dataB[i], resD[i]); EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]); @@ -615,6 +646,7 @@ template struct TheTest Data resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); @@ -632,6 +664,7 @@ template struct TheTest Data resC = v_min(a, b), resD = v_max(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); } @@ -672,6 +705,7 @@ template struct TheTest const u_type mask = std::numeric_limits::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0; for (int i = 0; i < Ru::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); u_type uA = dataA[i] ^ mask; u_type uB = dataB[i] ^ mask; EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]); @@ -691,6 +725,7 @@ template struct TheTest Data resC = v_absdiff(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]); } return *this; @@ -744,6 +779,7 @@ template struct TheTest Data resF = f; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); int_type m2 = dataB.as_int(i); EXPECT_EQ((dataD.as_int(i) & m2) | (dataE.as_int(i) & ~m2), resF.as_int(i)); } @@ -776,6 +812,7 @@ template struct TheTest const w_type add = (w_type)1 << (s - 1); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(pack_saturate_cast(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast((dataA[i] + add) >> s), resD[i]); @@ -816,6 +853,7 @@ template struct TheTest const w_type add = (w_type)1 << (s - 1); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(pack_saturate_cast(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast((dataA[i] + add) >> s), resD[i]); @@ -845,6 +883,7 @@ template struct TheTest const int n = R::nlanes/2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i], resC[i*2]); EXPECT_EQ(dataB[i], resC[i*2+1]); EXPECT_EQ(dataA[i+n], resD[i*2]); @@ -876,6 +915,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); if (i + s >= R::nlanes) EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]); else @@ -901,6 +941,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); if (i + s >= R::nlanes) { EXPECT_EQ((LaneType)0, resC[i]); @@ -940,6 +981,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(cvRound(data1[i]), resB[i]); EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); EXPECT_EQ(cvFloor(data1[i]), resD[i]); @@ -964,6 +1006,7 @@ template struct TheTest int n = std::min(Rt::nlanes, R::nlanes); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); } return *this; @@ -983,10 +1026,12 @@ template struct TheTest int n = std::min(Rt::nlanes, R::nlanes); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); } for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]); } #endif @@ -1006,6 +1051,7 @@ template struct TheTest { for (int j = i; j < i + 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); LaneType val = dataV[i] * dataA[j] + dataV[i + 1] * dataB[j] + dataV[i + 2] * dataC[j] @@ -1019,6 +1065,7 @@ template struct TheTest { for (int j = i; j < i + 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); LaneType val = dataV[i] * dataA[j] + dataV[i + 1] * dataB[j] + dataV[i + 2] * dataC[j] @@ -1045,6 +1092,7 @@ template struct TheTest { for (int j = 0; j < 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); EXPECT_EQ(dataA[i + j], res[j][i]); EXPECT_EQ(dataB[i + j], res[j][i + 1]); EXPECT_EQ(dataC[i + j], res[j][i + 2]); @@ -1066,6 +1114,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; i += 4) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]); EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]); EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]); @@ -1121,7 +1170,304 @@ template struct TheTest }; + +#if 1 +#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION); +#endif + +//============= 8-bit integer ===================================================================== + +void test_hal_intrin_uint8() +{ + DUMP_ENTRY(v_uint8); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>() + .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>() + .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>() + .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>() + ; #endif +} + +void test_hal_intrin_int8() +{ + DUMP_ENTRY(v_int8); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; +} + +//============= 16-bit integer ===================================================================== + +void test_hal_intrin_uint16() +{ + DUMP_ENTRY(v_uint16); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +void test_hal_intrin_int16() +{ + DUMP_ENTRY(v_int16); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_dot_prod() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +//============= 32-bit integer ===================================================================== + +void test_hal_intrin_uint32() +{ + DUMP_ENTRY(v_uint32); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_transpose() + ; +} + +void test_hal_intrin_int32() +{ + DUMP_ENTRY(v_int32); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_abs() + .test_cmp() + .test_popcount() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_float_cvt32() + .test_float_cvt64() + .test_transpose() + ; +} + +//============= 64-bit integer ===================================================================== + +void test_hal_intrin_uint64() +{ + DUMP_ENTRY(v_uint64); + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +void test_hal_intrin_int64() +{ + DUMP_ENTRY(v_int64); + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +//============= Floating point ===================================================================== +void test_hal_intrin_float32() +{ + DUMP_ENTRY(v_float32); + TheTest() + .test_loadstore() + .test_interleave() + .test_interleave_2channel() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_reduce() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt64() + .test_matmul() + .test_transpose() + .test_reduce_sum4() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() + .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() + ; +#endif +} + +void test_hal_intrin_float64() +{ + DUMP_ENTRY(v_float64); +#if CV_SIMD_64F + TheTest() + .test_loadstore() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt32() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_extract<2>().test_extract<3>() + .test_rotate<2>().test_rotate<3>() + ; +#endif //CV_SIMD256 + +#endif +} + +#if CV_FP16 +void test_hal_intrin_float16() +{ + DUMP_ENTRY(v_float16); +#if CV_SIMD_WIDTH > 16 + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +#endif +} +#endif + +/*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16 +void test_hal_intrin_float16() +{ + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +} +#endif*/ + +#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -CV_CPU_OPTIMIZATION_NAMESPACE_END -}} // namespace +//CV_CPU_OPTIMIZATION_NAMESPACE_END +//}}} // namespace diff --git a/modules/core/test/test_precomp.hpp b/modules/core/test/test_precomp.hpp index 9787586156f5..a82f5cc12c9d 100644 --- a/modules/core/test/test_precomp.hpp +++ b/modules/core/test/test_precomp.hpp @@ -11,6 +11,5 @@ #include "opencv2/core/cvdef.h" #include "opencv2/core/private.hpp" #include "opencv2/core/hal/hal.hpp" -#include "opencv2/core/hal/intrin.hpp" #endif