From 44e706aab23183b593b2b65f235a23962e1220cf Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 1 Aug 2021 22:16:21 +0200
Subject: [PATCH 01/17] api: Start to fix complex types

This is a first try at moving to a sane, defined complex API. Before we
were basically relying on undefined behavior. This bites us with
PowerPC.

The current changes work for C but C++ code does not compile yet.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 43 +++++--------------------------------
 1 file changed, 5 insertions(+), 38 deletions(-)
diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index 4d0efc4ba..ecdcb44a6 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,43 +26,10 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
-#ifdef __cplusplus
-
-#include <stdint.h>
-#include <complex>
-
-typedef std::complex<int8_t> lv_8sc_t;
-typedef std::complex<int16_t> lv_16sc_t;
-typedef std::complex<int32_t> lv_32sc_t;
-typedef std::complex<int64_t> lv_64sc_t;
-typedef std::complex<float> lv_32fc_t;
-typedef std::complex<double> lv_64fc_t;
-
-template <typename T>
-inline std::complex<T> lv_cmake(const T& r, const T& i)
-{
-    return std::complex<T>(r, i);
-}
-
-template <typename T>
-inline typename T::value_type lv_creal(const T& x)
-{
-    return x.real();
-}
-
-template <typename T>
-inline typename T::value_type lv_cimag(const T& x)
-{
-    return x.imag();
-}
-
-template <typename T>
-inline T lv_conj(const T& x)
-{
-    return std::conj(x);
-}
-
-#else /* __cplusplus */
+
+#include <volk/volk_common.h>
+
+__VOLK_DECL_BEGIN
 
 #include <complex.h>
 #include <tgmath.h>
@@ -101,6 +68,6 @@ typedef double complex lv_64fc_t;
 
 #endif /* __GNUC__ */
 
-#endif /* __cplusplus */
+__VOLK_DECL_END
 
 #endif /* INCLUDE_VOLK_COMPLEX_H */

From 56358a4136721481cc013ac3f3dd82e382df56e2 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Mon, 9 Aug 2021 14:09:04 +0200
Subject: [PATCH 02/17] c++: Make things compile

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index ecdcb44a6..601625ae6 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,22 +26,30 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
-
 #include <volk/volk_common.h>
 
 __VOLK_DECL_BEGIN
 
 #include <complex.h>
-#include <tgmath.h>
 
-typedef char complex lv_8sc_t;
-typedef short complex lv_16sc_t;
-typedef long complex lv_32sc_t;
-typedef long long complex lv_64sc_t;
-typedef float complex lv_32fc_t;
-typedef double complex lv_64fc_t;
+// Obviously, we would love `typedef float complex lv_32fc_t` to work.
+// However, this clashes with C++ definitions.
+// error: expected initializer before ‘lv_32fc_t’
+//    --> typedef float complex lv_32fc_t;
+// https://stackoverflow.com/a/10540302
+
+typedef char _Complex lv_8sc_t;
+typedef short _Complex lv_16sc_t;
+typedef long _Complex lv_32sc_t;
+typedef long long _Complex lv_64sc_t;
+typedef float _Complex lv_32fc_t;
+typedef double _Complex lv_64fc_t;
 
 #define lv_cmake(r, i) ((r) + _Complex_I * (i))
+// We want `_Imaginary_I` to ensure the correct sign.
+// https://en.cppreference.com/w/c/numeric/complex/Imaginary_I
+// It does not compile. Complex numbers are a terribly implemented afterthought.
+// #define lv_cmake(r, i) ((r) + _Imaginary_I * (i))
 
 // When GNUC is available, use the complex extensions.
 // The extensions always return the correct value type.
@@ -60,6 +68,7 @@ typedef double complex lv_64fc_t;
 // with type-generic versions.
 #else /* __GNUC__ */
 
+
 #define lv_creal(x) (creal(x))
 
 #define lv_cimag(x) (cimag(x))

From 40c109f94cb616a1b8980ff6061f056c6563636a Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Mon, 9 Aug 2021 14:10:44 +0200
Subject: [PATCH 03/17] wip: Add example files

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 compile.sh |  3 +++
 main.c     | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.cc    | 34 ++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100755 compile.sh
 create mode 100644 main.c
 create mode 100644 main.cc

diff --git a/compile.sh b/compile.sh
new file mode 100755
index 000000000..f9f19bca8
--- /dev/null
+++ b/compile.sh
@@ -0,0 +1,3 @@
+gcc -std=c17 -I/home/johannes/src/volk/include -x c main.c -o mainvolkgnuc -lm
+clang -std=c17 -I/home/johannes/src/volk/include -x c main.c -o mainvolkclangc -lm
+g++ -std=c++17 -I/home/johannes/src/volk/include -x c++ main.cc -o mainvolkcpp -lm -lfmt
\ No newline at end of file
diff --git a/main.c b/main.c
new file mode 100644
index 000000000..dd7e9cdf1
--- /dev/null
+++ b/main.c
@@ -0,0 +1,58 @@
+
+#include <stdio.h>
+#include <volk/volk_complex.h>
+
+int main(int argc, char* argv[])
+{
+    lv_32fc_t fc_cpl[4];
+    printf("float=%lu, complex float=%lu, complex float array[4]=%lu\n",
+           sizeof(float),
+           sizeof(lv_32fc_t),
+           sizeof(fc_cpl));
+
+    for (int i = 0; i < 4; i++) {
+        fc_cpl[i] = (i + 3) + I * (i + 8);
+
+        fc_cpl[i] = lv_cmake(i + 3, i + 8);
+    }
+    for (int i = 0; i < 4; i++) {
+        lv_32fc_t val = fc_cpl[i];
+        lv_32fc_t cval = conj(val);
+        lv_32fc_t gval = ~val;
+        lv_32fc_t mult = val * val;
+        printf("val      = %+.1f%+.1fj\n", creal(val), cimag(val));
+        printf("conj(val)= %+.1f%+.1fj\n", creal(cval), cimag(cval));
+        printf("gcc: ~val= %+.1f%+.1fj\n", creal(gval), cimag(gval));
+        printf("val*val  = %+.1f%+.1fj\n", creal(mult), cimag(mult));
+    }
+
+    lv_8sc_t sc_cpl[4];
+    printf("\n\nchar=%lu, complex char=%lu, complex char array[4]=%lu\n",
+           sizeof(char),
+           sizeof(lv_8sc_t),
+           sizeof(sc_cpl));
+
+    for (int i = 0; i < 4; i++) {
+        // lv_8sc_t value = (i + 3) + I * (i + 8);
+        // printf("value=%+hhi%+hhij\n", creal(value), cimag(value));
+        // sc_cpl[i] = (i + 3) + I * (i + 8);
+        sc_cpl[i] = lv_cmake(i + 3, i + 8);
+        // printf("%i + j %i\n", creal(sc_cpl[i]), cimag(sc_cpl[i]));
+    }
+    for (int i = 0; i < 4; i++) {
+        lv_8sc_t val = sc_cpl[i];
+        lv_8sc_t cval = conj(val);
+        // lv_8sc_t cval = lv_cmake(creal(val), -cimag(val));
+        lv_8sc_t gval = ~val;
+        lv_8sc_t mult = val * val;
+        printf("val      = %+hhi%+hhij\n", __real__ val, __imag__ val);
+        printf("conj(val)= %+hhi%+hhij\n", __real__ cval, __imag__ cval);
+        printf("gcc: ~val= %+hhi%+hhij\n", __real__ gval, __imag__ gval);
+        printf("val*val  = %+hhi%+hhij\n", __real__ mult, __imag__ mult);
+    }
+
+    //     char* values = (char*) sc_cpl;
+    //   for (int i = 0; i < 8; i++) {
+    //     printf("%hhi\n", values[i]);
+    //   }
+}
\ No newline at end of file
diff --git a/main.cc b/main.cc
new file mode 100644
index 000000000..a72489861
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,34 @@
+#include <fmt/core.h>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <complex>
+
+
+#include <volk/volk_complex.h>
+
+
+int main(int argc, char* argv[])
+{
+    lv_32fc_t fc_cpl[4];
+    fmt::print("float={}, complex float={}, complex float array[4]={}\n",
+               sizeof(float),
+               sizeof(lv_32fc_t),
+               sizeof(fc_cpl));
+
+
+    std::vector<lv_32fc_t> vec(4);
+    for (int i = 0; i < 4; i++) {
+        auto foo = std::complex<float>( (i + 3), (i + 8) );
+        fmt::print("std::complex: ({:+.1f}{:+.1f}j)\n", std::real(foo), std::imag(foo));
+        lv_32fc_t bar = lv_32fc_t{5, 6};
+        vec.at(i) = bar;
+        
+    }
+
+    for(auto &val : vec){
+        float r = __real__ val;
+        float i = __imag__ val;
+        fmt::print("sizeof(val)={}, {:+.1f}{:+.1f}j\n", sizeof(val), r, i);
+    }
+}
\ No newline at end of file

From 59b58a36dc2666b5080f7e8bea1b603d3fcfc3db Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sat, 28 Aug 2021 17:03:21 +0200
Subject: [PATCH 04/17] complex: Add example interface for C++

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 compile.sh                  |  6 ++--
 include/volk/volk_complex.h | 12 ++++++--
 main.c                      | 42 +++++++++++++++++++++++++-
 main.cc                     | 60 +++++++++++++++++++++++++++++++++----
 4 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/compile.sh b/compile.sh
index f9f19bca8..0af7ef7c1 100755
--- a/compile.sh
+++ b/compile.sh
@@ -1,3 +1,3 @@
-gcc -std=c17 -I/home/johannes/src/volk/include -x c main.c -o mainvolkgnuc -lm
-clang -std=c17 -I/home/johannes/src/volk/include -x c main.c -o mainvolkclangc -lm
-g++ -std=c++17 -I/home/johannes/src/volk/include -x c++ main.cc -o mainvolkcpp -lm -lfmt
\ No newline at end of file
+gcc -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkgnuc -lm -lvolk
+clang -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkclangc -lm -lvolk
+g++ -std=c++17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c++ main.cc -o mainvolkcpp -lm -lfmt -lvolk
\ No newline at end of file
diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index 601625ae6..b872216ad 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,9 +26,11 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
-#include <volk/volk_common.h>
+// #include <volk/volk_common.h>
 
-__VOLK_DECL_BEGIN
+#if defined(__cplusplus)
+extern "C" {
+#endif
 
 #include <complex.h>
 
@@ -77,6 +79,10 @@ typedef double _Complex lv_64fc_t;
 
 #endif /* __GNUC__ */
 
-__VOLK_DECL_END
+// __VOLK_DECL_END
+
+#if defined(__cplusplus)
+}
+#endif
 
 #endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/main.c b/main.c
index dd7e9cdf1..0d93eb85b 100644
--- a/main.c
+++ b/main.c
@@ -1,9 +1,49 @@
 
 #include <stdio.h>
-#include <volk/volk_complex.h>
+#include <math.h>
+#include <volk/volk.h>
+
+void function_test(int num_points)
+{
+    unsigned int alignment = volk_get_alignment();
+    lv_32fc_t* in0 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+    lv_32fc_t* in1 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+    lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        // Generate two tones
+        float real_1 = cosf(0.3f * (float)ii);
+        float imag_1 = sinf(0.3f * (float)ii);
+        in0[ii] = lv_cmake(real_1, imag_1);
+        float real_2 = cosf(0.1f * (float)ii);
+        float imag_2 = sinf(0.1f * (float)ii);
+        in1[ii] = lv_cmake(real_2, imag_2);
+    }
+
+    volk_32fc_x2_multiply_32fc(out, in0, in1, num_points);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        lv_32fc_t v0 = in0[ii];
+        lv_32fc_t v1 = in1[ii];
+        lv_32fc_t o = out[ii];
+        printf("in0=(%+.1f%+.1fj), in1=(%+.1f%+.1fj), out=(%+.1f%+.1fj)\n",
+               creal(v0),
+               cimag(v0),
+               creal(v1),
+               cimag(v1),
+               creal(o),
+               cimag(o));
+    }
+
+    volk_free(in0);
+    volk_free(in1);
+    volk_free(out);
+}
 
 int main(int argc, char* argv[])
 {
+    function_test(32);
+
     lv_32fc_t fc_cpl[4];
     printf("float=%lu, complex float=%lu, complex float array[4]=%lu\n",
            sizeof(float),
diff --git a/main.cc b/main.cc
index a72489861..83117827b 100644
--- a/main.cc
+++ b/main.cc
@@ -1,15 +1,64 @@
 #include <fmt/core.h>
+#include <cmath>
+#include <complex>
 #include <cstdlib>
 #include <iostream>
 #include <vector>
-#include <complex>
+
+typedef std::complex<float> cmplxf;
+
+#include <volk/volk.h>
+#include <volk/volk_alloc.hh>
 
 
-#include <volk/volk_complex.h>
+void cppmultiply(volk::vector<cmplxf>& result,
+                 volk::vector<cmplxf>& input0,
+                 volk::vector<cmplxf>& input1)
+{
+    volk_32fc_x2_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result.data()),
+                               reinterpret_cast<lv_32fc_t*>(input0.data()),
+                               reinterpret_cast<lv_32fc_t*>(input1.data()),
+                               input0.size());
+}
+
+void function_test(int num_points)
+{
+    volk::vector<cmplxf> in0(num_points);
+    volk::vector<cmplxf> in1(num_points);
+    volk::vector<cmplxf> out(num_points);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        // Generate two tones
+        float real_1 = std::cos(0.3f * (float)ii);
+        float imag_1 = std::sin(0.3f * (float)ii);
+        in0[ii] = cmplxf(real_1, imag_1);
+        float real_2 = std::cos(0.1f * (float)ii);
+        float imag_2 = std::sin(0.1f * (float)ii);
+        in1[ii] = cmplxf(real_2, imag_2);
+    }
+
+    cppmultiply(out, in0, in1);
+
+    for (int ii = 0; ii < num_points; ++ii) {
+        cmplxf v0 = in0[ii];
+        cmplxf v1 = in1[ii];
+        cmplxf o = out[ii];
+
+        fmt::print(
+            "in0=({:+.1f}{:+.1f}j), in1=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n",
+            std::real(v0),
+            std::imag(v0),
+            std::real(v1),
+            std::imag(v1),
+            std::real(o),
+            std::imag(o));
+    }
+}
 
 
 int main(int argc, char* argv[])
 {
+    function_test(32);
     lv_32fc_t fc_cpl[4];
     fmt::print("float={}, complex float={}, complex float array[4]={}\n",
                sizeof(float),
@@ -19,14 +68,13 @@ int main(int argc, char* argv[])
 
     std::vector<lv_32fc_t> vec(4);
     for (int i = 0; i < 4; i++) {
-        auto foo = std::complex<float>( (i + 3), (i + 8) );
+        auto foo = std::complex<float>((i + 3), (i + 8));
         fmt::print("std::complex: ({:+.1f}{:+.1f}j)\n", std::real(foo), std::imag(foo));
-        lv_32fc_t bar = lv_32fc_t{5, 6};
+        lv_32fc_t bar = lv_32fc_t{ 5, 6 };
         vec.at(i) = bar;
-        
     }
 
-    for(auto &val : vec){
+    for (auto& val : vec) {
         float r = __real__ val;
         float i = __imag__ val;
         fmt::print("sizeof(val)={}, {:+.1f}{:+.1f}j\n", sizeof(val), r, i);

From 165e103b5bd5cb516f89a497ceafae7d35f2c042 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 5 Sep 2021 14:29:51 +0200
Subject: [PATCH 05/17] qa: Update QA code to new interface

With this commit, our QA code should work again. However, it still
relies on C types in the C++ domain. We probably want to add a C++
wrapper around C VOLK.

The idea:
- add `volk.hh`
- Include C++ magic in here.
- Find idiomatic, modern C++ interface defition.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 lib/kernel_tests.h | 3 ++-
 lib/qa_utils.cc    | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h
index dc3484127..3ad197fca 100644
--- a/lib/kernel_tests.h
+++ b/lib/kernel_tests.h
@@ -42,7 +42,8 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
     test_params_power.set_scalar(2.5);
 
     volk_test_params_t test_params_rotator(test_params);
-    test_params_rotator.set_scalar(std::polar(1.0f, 0.1f));
+    auto rotator_value = std::polar(1.0f, 0.1f);
+    test_params_rotator.set_scalar(lv_32fc_t{rotator_value.real(), rotator_value.imag()});
     test_params_rotator.set_tol(1e-3);
 
     std::vector<volk_test_case_t> test_cases;
diff --git a/lib/qa_utils.cc b/lib/qa_utils.cc
index 378d544d1..1f0a47162 100644
--- a/lib/qa_utils.cc
+++ b/lib/qa_utils.cc
@@ -636,7 +636,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);
@@ -659,7 +659,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);
@@ -682,7 +682,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);

From 18e2ebe027777bf409b6e42c8943fc60abdd3dd0 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 5 Sep 2021 14:42:24 +0200
Subject: [PATCH 06/17] ci: Fix formatting in kernel_tests.h

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 lib/kernel_tests.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h
index 3ad197fca..dbac3084d 100644
--- a/lib/kernel_tests.h
+++ b/lib/kernel_tests.h
@@ -43,7 +43,8 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
 
     volk_test_params_t test_params_rotator(test_params);
     auto rotator_value = std::polar(1.0f, 0.1f);
-    test_params_rotator.set_scalar(lv_32fc_t{rotator_value.real(), rotator_value.imag()});
+    test_params_rotator.set_scalar(
+        lv_32fc_t{ rotator_value.real(), rotator_value.imag() });
     test_params_rotator.set_tol(1e-3);
 
     std::vector<volk_test_case_t> test_cases;

From 20566d88acf97d2648f05a3677d467b305086c1f Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 5 Sep 2021 14:43:46 +0200
Subject: [PATCH 07/17] ci: Fix formatting in main.c (temp file)

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.c b/main.c
index 0d93eb85b..8bebd8af8 100644
--- a/main.c
+++ b/main.c
@@ -1,6 +1,6 @@
 
-#include <stdio.h>
 #include <math.h>
+#include <stdio.h>
 #include <volk/volk.h>
 
 void function_test(int num_points)

From 54f34b2949586adb73c91946ef13e0be6eb42d59 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 5 Sep 2021 14:51:58 +0200
Subject: [PATCH 08/17] ci: Add ppc64le and s390x tests to TravisCI

We removed these tests because of our broken interface. Now, we aim to
fix this issue and thus, expect it to work again.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 .travis.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index a08d2351c..aac635553 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,6 +56,16 @@ matrix:
       env: MATRIX_EVAL="CC=clang && CXX=clang++"
       addons: {apt: {packages: [*common_packages, ]}}
 
+    - name: Linux s390x GCC 7
+      arch: s390x
+      env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
+      addons: {apt: {packages: [*common_packages, ]}}
+
+    - name: Linux ppc64le GCC 7
+      arch: ppc64le
+      env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
+      addons: {apt: {packages: [*common_packages, ]}}
+      
 script:
   - eval "${MATRIX_EVAL}"
   - lscpu

From 32b4552932fec8644c21260dfac0da561f4be72d Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 5 Sep 2021 16:03:14 +0200
Subject: [PATCH 09/17] api: Develop C++ wrapper API

The VOLK C API should be in place.
We stick with the current API. Except, we always require it to be C. No
more C and C++ mix and match.

This allows us to be more open about the C++ API. Here, we write
wrappers around our C functions.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 main.cc | 166 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 142 insertions(+), 24 deletions(-)

diff --git a/main.cc b/main.cc
index 83117827b..10c9389f5 100644
--- a/main.cc
+++ b/main.cc
@@ -1,48 +1,136 @@
 #include <fmt/core.h>
+#include <algorithm>
 #include <cmath>
 #include <complex>
+#include <cstdint>
 #include <cstdlib>
 #include <iostream>
 #include <vector>
 
-typedef std::complex<float> cmplxf;
+/*
+ * These type definitions are in line with our C definitions.
+ *
+ * Alternativele, we could go with the NumPy scheme:
+ * np.complex64 aka std::complex<float>
+ * np.complex128 aka std::complex<double>
+ * The underlying types are probably defined like Ctypes.
+ * This is about the idea.
+ */
+typedef std::complex<int8_t> ic8;
+typedef std::complex<int16_t> ic16;
+typedef std::complex<int32_t> ic32;
+typedef std::complex<int64_t> ic64;
+typedef std::complex<float> fc32;
+typedef std::complex<double> fc64;
 
 #include <volk/volk.h>
 #include <volk/volk_alloc.hh>
 
+/* C++ Interface requirements
+ *
+ * 1. Make C++ STL types usable `std::vector`, `std::complex`.
+ * 2. Make aligned vectors aka `volk::vector` usable.
+ * 3. Allow call-by-pointer for GR buffer interface usage etc.
+ *
+ * These requirements result in at least 3 functions.
+ * We might want to think about fancy new C++ features e.g. concepts to consolidate these.
+ */
 
-void cppmultiply(volk::vector<cmplxf>& result,
-                 volk::vector<cmplxf>& input0,
-                 volk::vector<cmplxf>& input1)
+namespace volk {
+
+/*
+ * Start of wrapper for volk_32fc_s32fc_multiply_32fc
+ */
+void cppscalarmultiply_pointers(fc32* result,
+                                const fc32* input0,
+                                const fc32 scalar,
+                                const unsigned int num_points)
+{
+    volk_32fc_s32fc_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result),
+                                  reinterpret_cast<const lv_32fc_t*>(input0),
+                                  lv_32fc_t{ scalar.real(), scalar.imag() },
+                                  num_points);
+}
+
+void cppscalarmultiply_stl_vector(std::vector<fc32>& result,
+                                  const std::vector<fc32>& input0,
+                                  const fc32 scalar)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size() });
+    cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points);
+}
+
+void cppscalarmultiply_aligned_vector(volk::vector<fc32>& result,
+                                      const volk::vector<fc32>& input0,
+                                      const fc32 scalar)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size() });
+    cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points);
+}
+
+/*
+ * Start of wrapper for volk_32fc_x2_multiply_32fc
+ */
+void cppmultiply_pointers(fc32* result,
+                          const fc32* input0,
+                          const fc32* input1,
+                          const unsigned int num_points)
+{
+    volk_32fc_x2_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result),
+                               reinterpret_cast<const lv_32fc_t*>(input0),
+                               reinterpret_cast<const lv_32fc_t*>(input1),
+                               num_points);
+}
+
+void cppmultiply_stl_vector(std::vector<fc32>& result,
+                            const std::vector<fc32>& input0,
+                            const std::vector<fc32>& input1)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() });
+    cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points);
+}
+
+void cppmultiply_aligned_vector(volk::vector<fc32>& result,
+                                const volk::vector<fc32>& input0,
+                                const volk::vector<fc32>& input1)
 {
-    volk_32fc_x2_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result.data()),
-                               reinterpret_cast<lv_32fc_t*>(input0.data()),
-                               reinterpret_cast<lv_32fc_t*>(input1.data()),
-                               input0.size());
+    unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() });
+    cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points);
 }
 
-void function_test(int num_points)
+} // namespace volk
+
+
+std::vector<fc32> fill_vector(int num_points, float step_value)
 {
-    volk::vector<cmplxf> in0(num_points);
-    volk::vector<cmplxf> in1(num_points);
-    volk::vector<cmplxf> out(num_points);
+    std::vector<fc32> vec(num_points);
 
     for (unsigned int ii = 0; ii < num_points; ++ii) {
-        // Generate two tones
-        float real_1 = std::cos(0.3f * (float)ii);
-        float imag_1 = std::sin(0.3f * (float)ii);
-        in0[ii] = cmplxf(real_1, imag_1);
-        float real_2 = std::cos(0.1f * (float)ii);
-        float imag_2 = std::sin(0.1f * (float)ii);
-        in1[ii] = cmplxf(real_2, imag_2);
+        float real_1 = std::cos(step_value * (float)ii);
+        float imag_1 = std::sin(step_value * (float)ii);
+        vec[ii] = fc32(real_1, imag_1);
     }
+    return vec;
+}
+
+void function_test_vectors(int num_points)
+{
+    std::vector<fc32> uin0(fill_vector(num_points, 0.3f));
+    volk::vector<fc32> in0(uin0.begin(), uin0.end());
+    std::vector<fc32> uin1(fill_vector(num_points, 0.1f));
+    volk::vector<fc32> in1(uin1.begin(), uin1.end());
+    std::vector<fc32> uout(num_points);
+    volk::vector<fc32> out(num_points);
+
+    volk::cppmultiply_aligned_vector(out, in0, in1);
 
-    cppmultiply(out, in0, in1);
+    volk::cppmultiply_stl_vector(uout, uin0, uin1);
+    volk::cppmultiply_pointers(uout.data(), in0.data(), in1.data(), num_points);
 
     for (int ii = 0; ii < num_points; ++ii) {
-        cmplxf v0 = in0[ii];
-        cmplxf v1 = in1[ii];
-        cmplxf o = out[ii];
+        fc32 v0 = in0[ii];
+        fc32 v1 = in1[ii];
+        fc32 o = out[ii];
 
         fmt::print(
             "in0=({:+.1f}{:+.1f}j), in1=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n",
@@ -55,10 +143,40 @@ void function_test(int num_points)
     }
 }
 
+void function_test_with_scalar(int num_points)
+{
+    std::vector<fc32> uin0(fill_vector(num_points, 0.3f));
+    volk::vector<fc32> in0(uin0.begin(), uin0.end());
+    fc32 scalar{ 0.5f, 4.3f };
+    std::vector<fc32> uout(num_points);
+    volk::vector<fc32> out(num_points);
+
+    volk::cppscalarmultiply_aligned_vector(out, in0, scalar);
+
+    volk::cppscalarmultiply_stl_vector(uout, uin0, scalar);
+    volk::cppscalarmultiply_pointers(uout.data(), in0.data(), scalar, num_points);
+
+    fmt::print("scalar=({:+.1f}{:+.1f}j)\n", std::real(scalar), std::imag(scalar));
+    for (int ii = 0; ii < num_points; ++ii) {
+        fc32 v0 = in0[ii];
+        fc32 o = out[ii];
+
+        fmt::print("in0=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n",
+                   std::real(v0),
+                   std::imag(v0),
+                   std::real(o),
+                   std::imag(o));
+    }
+}
 
 int main(int argc, char* argv[])
 {
-    function_test(32);
+    fmt::print("Vector function test\n");
+    function_test_vectors(16);
+
+    fmt::print("Scalar function test\n");
+    function_test_with_scalar(16);
+
     lv_32fc_t fc_cpl[4];
     fmt::print("float={}, complex float={}, complex float array[4]={}\n",
                sizeof(float),

From ae7e1039d23e5d7ef697dde86fd97f8ca6f97659 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 26 Sep 2021 14:55:21 +0200
Subject: [PATCH 10/17] interface: Declare extern C lib

Now we declare extern C for all compilers and hope this works.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_common.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/volk/volk_common.h b/include/volk/volk_common.h
index 70b94cbdd..dfb520407 100644
--- a/include/volk/volk_common.h
+++ b/include/volk/volk_common.h
@@ -85,8 +85,9 @@
 ////////////////////////////////////////////////////////////////////////
 // C-linkage declaration macros
 // FIXME: due to the usage of complex.h, require gcc for c-linkage
+// Hope and p*** that extern "C" works for all relevant compilers nowadays.
 ////////////////////////////////////////////////////////////////////////
-#if defined(__cplusplus) && (__GNUC__)
+#if defined(__cplusplus)
 #define __VOLK_DECL_BEGIN extern "C" {
 #define __VOLK_DECL_END }
 #else

From f74b580402c649ff6875561f7f895d36f2eaddb4 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 26 Sep 2021 15:10:04 +0200
Subject: [PATCH 11/17] interface: Declare more extern C

Make sure everything that is supposed have C-linkage does have
C-linkage.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 tmpl/volk_typedefs.tmpl.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tmpl/volk_typedefs.tmpl.h b/tmpl/volk_typedefs.tmpl.h
index 2600c642c..7307bf4b3 100644
--- a/tmpl/volk_typedefs.tmpl.h
+++ b/tmpl/volk_typedefs.tmpl.h
@@ -10,6 +10,10 @@
 #ifndef INCLUDED_VOLK_TYPEDEFS
 #define INCLUDED_VOLK_TYPEDEFS
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 #include <inttypes.h>
 #include <volk/volk_complex.h>
 
@@ -17,4 +21,8 @@
 typedef void (*${kern.pname})(${kern.arglist_types});
 %endfor
 
+#if defined(__cplusplus)
+}
+#endif
+
 #endif /*INCLUDED_VOLK_TYPEDEFS*/

From fa675d507bf26a0b6889392837825d5bd0816340 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 26 Sep 2021 16:26:39 +0200
Subject: [PATCH 12/17] interface: Add review feedback

Use `__VOLK_DECL_BEGIN` etc. again. It seems to work now.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_common.h  |  2 +-
 include/volk/volk_complex.h | 11 +++--------
 lib/volk_rank_archs.h       | 10 ++++------
 tmpl/volk_typedefs.tmpl.h   |  6 ------
 4 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/include/volk/volk_common.h b/include/volk/volk_common.h
index dfb520407..d7bde1c5d 100644
--- a/include/volk/volk_common.h
+++ b/include/volk/volk_common.h
@@ -85,7 +85,7 @@
 ////////////////////////////////////////////////////////////////////////
 // C-linkage declaration macros
 // FIXME: due to the usage of complex.h, require gcc for c-linkage
-// Hope and p*** that extern "C" works for all relevant compilers nowadays.
+// Hope that extern "C" works for all relevant compilers nowadays.
 ////////////////////////////////////////////////////////////////////////
 #if defined(__cplusplus)
 #define __VOLK_DECL_BEGIN extern "C" {
diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index b872216ad..d65d3a3d6 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,11 +26,9 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
-// #include <volk/volk_common.h>
+#include <volk/volk_common.h>
 
-#if defined(__cplusplus)
-extern "C" {
-#endif
+__VOLK_DECL_BEGIN
 
 #include <complex.h>
 
@@ -79,10 +77,7 @@ typedef double _Complex lv_64fc_t;
 
 #endif /* __GNUC__ */
 
-// __VOLK_DECL_END
+__VOLK_DECL_END
 
-#if defined(__cplusplus)
-}
-#endif
 
 #endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/lib/volk_rank_archs.h b/lib/volk_rank_archs.h
index 0a6c2e117..e8ae1a3df 100644
--- a/lib/volk_rank_archs.h
+++ b/lib/volk_rank_archs.h
@@ -12,10 +12,9 @@
 
 #include <stdbool.h>
 #include <stdlib.h>
+#include <volk/volk_common.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+__VOLK_DECL_BEGIN
 
 int volk_get_index(const char* impl_names[], // list of implementations by name
                    const size_t n_impls,     // number of implementations available
@@ -30,7 +29,6 @@ int volk_rank_archs(const char* kern_name,    // name of the kernel to rank
                     const bool align          // if false, filter aligned implementations
 );
 
-#ifdef __cplusplus
-}
-#endif
+__VOLK_DECL_END
+
 #endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
diff --git a/tmpl/volk_typedefs.tmpl.h b/tmpl/volk_typedefs.tmpl.h
index 7307bf4b3..d60e54aa0 100644
--- a/tmpl/volk_typedefs.tmpl.h
+++ b/tmpl/volk_typedefs.tmpl.h
@@ -10,9 +10,6 @@
 #ifndef INCLUDED_VOLK_TYPEDEFS
 #define INCLUDED_VOLK_TYPEDEFS
 
-#if defined(__cplusplus)
-extern "C" {
-#endif
 
 #include <inttypes.h>
 #include <volk/volk_complex.h>
@@ -21,8 +18,5 @@ extern "C" {
 typedef void (*${kern.pname})(${kern.arglist_types});
 %endfor
 
-#if defined(__cplusplus)
-}
-#endif
 
 #endif /*INCLUDED_VOLK_TYPEDEFS*/

From 1fdf913c51ac5f8b81a4ba84eeeb181448de6c51 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 26 Sep 2021 16:27:38 +0200
Subject: [PATCH 13/17] interface: Add feedback for temp files

`main.c`, `main.cc` and `compile.sh` are temporary files that are
supposed to be removed, once the new interface is stabilized. In order
to remove distractions, these files should follow best practices.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 main.c  | 2 +-
 main.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.c b/main.c
index 8bebd8af8..de3e69a5b 100644
--- a/main.c
+++ b/main.c
@@ -95,4 +95,4 @@ int main(int argc, char* argv[])
     //   for (int i = 0; i < 8; i++) {
     //     printf("%hhi\n", values[i]);
     //   }
-}
\ No newline at end of file
+}
diff --git a/main.cc b/main.cc
index 10c9389f5..59898093d 100644
--- a/main.cc
+++ b/main.cc
@@ -197,4 +197,4 @@ int main(int argc, char* argv[])
         float i = __imag__ val;
         fmt::print("sizeof(val)={}, {:+.1f}{:+.1f}j\n", sizeof(val), r, i);
     }
-}
\ No newline at end of file
+}

From 36292bc5362fd4fbff899b074cb1ebc2c9a638ad Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 26 Sep 2021 16:36:05 +0200
Subject: [PATCH 14/17] interface: Incorporate feedback for MacOS

Michael's comments to fix MacOS builds are included now. It compiles on
my system too. Hopefully CI etc. passes too.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 4 +---
 tmpl/volk_typedefs.tmpl.h   | 4 ++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index d65d3a3d6..d34038eaf 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,12 +26,11 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
+#include <complex.h>
 #include <volk/volk_common.h>
 
 __VOLK_DECL_BEGIN
 
-#include <complex.h>
-
 // Obviously, we would love `typedef float complex lv_32fc_t` to work.
 // However, this clashes with C++ definitions.
 // error: expected initializer before ‘lv_32fc_t’
@@ -79,5 +78,4 @@ typedef double _Complex lv_64fc_t;
 
 __VOLK_DECL_END
 
-
 #endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/tmpl/volk_typedefs.tmpl.h b/tmpl/volk_typedefs.tmpl.h
index d60e54aa0..a1dad61a7 100644
--- a/tmpl/volk_typedefs.tmpl.h
+++ b/tmpl/volk_typedefs.tmpl.h
@@ -13,10 +13,14 @@
 
 #include <inttypes.h>
 #include <volk/volk_complex.h>
+#include <volk/volk_common.h>
+
+__VOLK_DECL_BEGIN
 
 %for kern in kernels:
 typedef void (*${kern.pname})(${kern.arglist_types});
 %endfor
 
+__VOLK_DECL_END
 
 #endif /*INCLUDED_VOLK_TYPEDEFS*/

From f74d94b52bcc1303e256361c561a4b55cc048269 Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sat, 2 Oct 2021 13:10:48 +0200
Subject: [PATCH 15/17] interface: Start MSVC fixes

MSVC does not support C complex numbers.
There's a different approach here. Thus, we try to fix it with some
additional burden. Let's poke the CI.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index d34038eaf..10fa590bd 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -30,7 +30,7 @@
 #include <volk/volk_common.h>
 
 __VOLK_DECL_BEGIN
-
+#ifndef __STDC_NO_COMPLEX__
 // Obviously, we would love `typedef float complex lv_32fc_t` to work.
 // However, this clashes with C++ definitions.
 // error: expected initializer before ‘lv_32fc_t’
@@ -44,6 +44,37 @@ typedef long long _Complex lv_64sc_t;
 typedef float _Complex lv_32fc_t;
 typedef double _Complex lv_64fc_t;
 
+#else
+// MSVC requires different treatment.
+// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-160
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/complex-math-support?view=msvc-160
+// Refer to `complex.h` in
+// https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/
+
+typedef _Fcomplex lv_32fc_t;
+typedef _Dcomplex lv_64fc_t;
+
+// typedef char _Complex lv_8sc_t;
+typedef struct lv_8sc_t {
+    char _Val[2];
+} lv_8sc_t;
+
+// typedef short _Complex lv_16sc_t;
+typedef struct lv_16sc_t {
+    short _Val[2];
+} lv_16sc_t;
+
+// typedef long _Complex lv_32sc_t;
+typedef struct lv_32sc_t {
+    long _Val[2];
+} lv_32sc_t;
+
+// typedef long long _Complex lv_64sc_t;
+typedef struct lv_64sc_t {
+    long long _Val[2];
+} lv_64sc_t;
+#endif
+
 #define lv_cmake(r, i) ((r) + _Complex_I * (i))
 // We want `_Imaginary_I` to ensure the correct sign.
 // https://en.cppreference.com/w/c/numeric/complex/Imaginary_I

From 5111d6b0b3eb7d7a6ad9c3f061a852ae876179fc Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sat, 2 Oct 2021 13:18:44 +0200
Subject: [PATCH 16/17] msvc: Check for _MSC_VER

MSVC requires special treatment. We check for it now.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index 10fa590bd..6ff4f4bc7 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -30,7 +30,7 @@
 #include <volk/volk_common.h>
 
 __VOLK_DECL_BEGIN
-#ifndef __STDC_NO_COMPLEX__
+#ifndef _MSC_VER
 // Obviously, we would love `typedef float complex lv_32fc_t` to work.
 // However, this clashes with C++ definitions.
 // error: expected initializer before ‘lv_32fc_t’

From 9011ff82bc16cdd4493c2e2420a30a692723b13a Mon Sep 17 00:00:00 2001
From: Johannes Demel <demel@uni-bremen.de>
Date: Sun, 10 Oct 2021 20:29:08 +0200
Subject: [PATCH 17/17] complex: Add more references

It is difficult to get MSVC to compile with complex numbers. Add more
referenceses to sources that explain the reasons.

Signed-off-by: Johannes Demel <demel@uni-bremen.de>
---
 include/volk/volk_complex.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index 6ff4f4bc7..96d99405f 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -50,6 +50,7 @@ typedef double _Complex lv_64fc_t;
 // https://docs.microsoft.com/en-us/cpp/c-runtime-library/complex-math-support?view=msvc-160
 // Refer to `complex.h` in
 // https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/
+// https://github.com/microsoft/STL/blob/main/stl/inc/complex
 
 typedef _Fcomplex lv_32fc_t;
 typedef _Dcomplex lv_64fc_t;