diff --git a/gen/archs.xml b/gen/archs.xml
index 164c7bb4..18c44016 100644
--- a/gen/archs.xml
+++ b/gen/archs.xml
@@ -85,6 +85,7 @@ at the top, as a last resort.
-funsafe-math-optimizations
-funsafe-math-optimizations
+
16
@@ -101,6 +102,7 @@ at the top, as a last resort.
-funsafe-math-optimizations
-funsafe-math-optimizations
+
16
diff --git a/kernels/volk/volk_32f_index_max_32u.h b/kernels/volk/volk_32f_index_max_32u.h
index 86dad0d1..e2c4e3ba 100644
--- a/kernels/volk/volk_32f_index_max_32u.h
+++ b/kernels/volk/volk_32f_index_max_32u.h
@@ -299,7 +299,11 @@ volk_32f_index_max_32u_neon(uint32_t* target, const float* src0, uint32_t num_po
if (maxValuesBuffer[number] > max) {
index = maxIndexesBuffer[number];
max = maxValuesBuffer[number];
+#ifdef _MSC_VER
+ } else if (maxValues.n128_f32[number] == max) {
+#else
} else if (maxValues[number] == max) {
+#endif
if (index > maxIndexesBuffer[number])
index = maxIndexesBuffer[number];
}
diff --git a/kernels/volk/volk_32f_index_min_32u.h b/kernels/volk/volk_32f_index_min_32u.h
index 0c8bf8c0..2ef44e99 100644
--- a/kernels/volk/volk_32f_index_min_32u.h
+++ b/kernels/volk/volk_32f_index_min_32u.h
@@ -284,7 +284,11 @@ volk_32f_index_min_32u_neon(uint32_t* target, const float* source, uint32_t num_
if (minValuesBuffer[number] < min) {
index = minIndexesBuffer[number];
min = minValuesBuffer[number];
+#ifdef _MSC_VER
+ } else if (minValues.n128_f32[number] == min) {
+#else
} else if (minValues[number] == min) {
+#endif
if (index > minIndexesBuffer[number])
index = minIndexesBuffer[number];
}
diff --git a/kernels/volk/volk_32fc_accumulator_s32fc.h b/kernels/volk/volk_32fc_accumulator_s32fc.h
index d7267ea6..0bbad7eb 100644
--- a/kernels/volk/volk_32fc_accumulator_s32fc.h
+++ b/kernels/volk/volk_32fc_accumulator_s32fc.h
@@ -229,10 +229,10 @@ static inline void volk_32fc_accumulator_s32fc_neon(lv_32fc_t* result,
lv_32fc_t returnValue = lv_cmake(0.f, 0.f);
unsigned int eighthPoints = num_points / 8;
float32x4_t in_vec;
- float32x4_t out_vec0 = { 0.f, 0.f, 0.f, 0.f };
- float32x4_t out_vec1 = { 0.f, 0.f, 0.f, 0.f };
- float32x4_t out_vec2 = { 0.f, 0.f, 0.f, 0.f };
- float32x4_t out_vec3 = { 0.f, 0.f, 0.f, 0.f };
+ float32x4_t out_vec0 = { 0.f };
+ float32x4_t out_vec1 = { 0.f };
+ float32x4_t out_vec2 = { 0.f };
+ float32x4_t out_vec3 = { 0.f };
__VOLK_ATTR_ALIGNED(32) float tempBuffer[4];
for (; number < eighthPoints; number++) {
diff --git a/kernels/volk/volk_32fc_convert_16ic.h b/kernels/volk/volk_32fc_convert_16ic.h
index a38cce64..38bc07a2 100644
--- a/kernels/volk/volk_32fc_convert_16ic.h
+++ b/kernels/volk/volk_32fc_convert_16ic.h
@@ -236,7 +236,7 @@ static inline void volk_32fc_convert_16ic_neonv8(lv_16sc_t* outputVector,
const float32x4_t max_val = vmovq_n_f32(max_val_f);
float32x4_t ret1, ret2, a, b;
- int32x4_t toint_a = { 0, 0, 0, 0 }, toint_b = { 0, 0, 0, 0 };
+ int32x4_t toint_a = { 0 }, toint_b = { 0 };
int16x4_t intInputVal1, intInputVal2;
int16x8_t res;
diff --git a/kernels/volk/volk_32u_byteswap.h b/kernels/volk/volk_32u_byteswap.h
index a6ec86f8..51339414 100644
--- a/kernels/volk/volk_32u_byteswap.h
+++ b/kernels/volk/volk_32u_byteswap.h
@@ -201,7 +201,10 @@ static inline void volk_32u_byteswap_neonv8(uint32_t* intsToSwap, unsigned int n
uint32_t* inputPtr = (uint32_t*)intsToSwap;
const unsigned int n8points = num_points / 8;
uint8x16_t input;
- uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+
+ uint8x16_t idx;
+ const uint8_t idx_data[] = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+ idx = vld1q_u8(idx_data);
unsigned int number = 0;
for (number = 0; number < n8points; ++number) {
diff --git a/kernels/volk/volk_32u_reverse_32u.h b/kernels/volk/volk_32u_reverse_32u.h
index 62150ac6..02a4686c 100644
--- a/kernels/volk/volk_32u_reverse_32u.h
+++ b/kernels/volk/volk_32u_reverse_32u.h
@@ -262,7 +262,9 @@ volk_32u_reverse_32u_neonv8(uint32_t* out, const uint32_t* in, unsigned int num_
const uint32_t* in_ptr = in;
uint32_t* out_ptr = out;
- const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+ uint8x16_t idx;
+ const uint8_t idx_data[] = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+ idx = vld1q_u8(idx_data);
const unsigned int quarterPoints = num_points / 4;
unsigned int number = 0;
@@ -290,8 +292,15 @@ volk_32u_reverse_32u_neonv8(uint32_t* out, const uint32_t* in, unsigned int num_
#ifdef LV_HAVE_NEON
#include
-
-#if defined(__aarch64__)
+#ifdef _MSC_VER
+#define DO_RBIT \
+ *out_ptr = _byteswap_ulong(*in_ptr); \
+ *out_ptr = ((*out_ptr & 0x55555555) << 1) | ((*out_ptr & 0xAAAAAAAA) >> 1); \
+ *out_ptr = ((*out_ptr & 0x33333333) << 2) | ((*out_ptr & 0xCCCCCCCC) >> 2); \
+ *out_ptr = ((*out_ptr & 0x0F0F0F0F) << 4) | ((*out_ptr & 0xF0F0F0F0) >> 4); \
+ in_ptr++; \
+ out_ptr++;
+#elif defined(__aarch64__)
#define DO_RBIT \
__VOLK_ASM("rbit %w[result], %w[value]" \
: [result] "=r"(*out_ptr) \
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 2c160b2f..67350c4e 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -221,20 +221,24 @@ check_c_source_compiles(
if(neon_compile_result)
set(CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/include)
+ if(MSVC)
+ if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM")
+ overrule_arch(neonv8 "Compiler doesn't support neonv8")
+ endif()
+ else(MSVC)
+ check_c_source_compiles(
+ "#include \n int main(){__VOLK_ASM(\"sub v1.4s,v1.4s,v1.4s\");}"
+ have_neonv8_result)
+ if(NOT have_neonv8_result)
+ overrule_arch(neonv8 "Compiler doesn't support neonv8")
+ endif()
+ endif(MSVC)
check_c_source_compiles(
"#include \n int main(){__VOLK_ASM(\"vrev32.8 q0, q0\");}"
have_neonv7_result)
- check_c_source_compiles(
- "#include \n int main(){__VOLK_ASM(\"sub v1.4s,v1.4s,v1.4s\");}"
- have_neonv8_result)
-
if(NOT have_neonv7_result)
overrule_arch(neonv7 "Compiler doesn't support neonv7")
endif()
-
- if(NOT have_neonv8_result)
- overrule_arch(neonv8 "Compiler doesn't support neonv8")
- endif()
else(neon_compile_result)
overrule_arch(neon "Compiler doesn't support NEON")
overrule_arch(neonv7 "Compiler doesn't support NEON")