Merge pull request #38 from DCM-UPB/templnet

Templnet
DCM-UPB · Jul 23, 2019 · 856b3c3 · 856b3c3
2 parents 35594c6 + e30c604
commit 856b3c3
Show file tree

Hide file tree

Showing 143 changed files with 2,479 additions and 363 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -24,15 +24,15 @@ matrix:
     - USE_OPENMP="TRUE"
 
   - os: osx
-    osx_image: xcode10.1
+    osx_image: xcode11
     env:
     - MYCXX="g++"
     - USE_DOCKER="FALSE"
     - USE_GCOV="TRUE"
     - USE_OPENMP="FALSE"
 
   - os: osx
-    osx_image: xcode10.1
+    osx_image: xcode11
     env:
     - MYCXX="clang++"
     - USE_DOCKER="FALSE"

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -4,3 +4,4 @@ link_libraries(qnets)
 add_executable(bench_actfs_derivs bench_actfs_derivs/main.cpp)
 add_executable(bench_actfs_ffprop bench_actfs_ffprop/main.cpp)
 add_executable(bench_nunits_ffprop bench_nunits_ffprop/main.cpp)
+add_executable(bench_templ_ffprop bench_templ_ffprop/main.cpp)
diff --git a/benchmark/bench_actfs_derivs/main.cpp b/benchmark/bench_actfs_derivs/main.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <random>
 
-#include "qnets/actf/ActivationFunctionManager.hpp"
+#include "qnets/poly/actf/ActivationFunctionManager.hpp"
 
 #include "FFNNBenchmarks.hpp"
 
@@ -13,7 +13,7 @@ void run_single_benchmark(const string &label, const string &actf_id, const doub
     pair<double, double> result;
     const double time_scale = 1000000000.; //nanoseconds
 
-    result = sample_benchmark_actf_derivs(std_actf::provideActivationFunction(actf_id), xdata, neval, nruns, flag_d1, flag_d2, flag_d3, flag_fad);
+    result = sample_benchmark(benchmark_actf_derivs, nruns, std_actf::provideActivationFunction(actf_id), xdata, neval, flag_d1, flag_d2, flag_d3, flag_fad);
     cout << label << ":" << setw(max(1, 11 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " nanoseconds" << endl;
 }
 

diff --git a/benchmark/bench_actfs_ffprop/main.cpp b/benchmark/bench_actfs_ffprop/main.cpp
@@ -2,8 +2,8 @@
 #include <iostream>
 #include <random>
 
-#include "qnets/actf/ActivationFunctionManager.hpp"
-#include "qnets/io/PrintUtilities.hpp"
+#include "qnets/poly/actf/ActivationFunctionManager.hpp"
+#include "qnets/poly/io/PrintUtilities.hpp"
 
 #include "FFNNBenchmarks.hpp"
 
@@ -14,7 +14,7 @@ void run_single_benchmark(const string &label, FeedForwardNeuralNetwork * const
     pair<double, double> result;
     const double time_scale = 1000000.; //microseconds
 
-    result = sample_benchmark_FFPropagate(ffnn, xdata, neval, nruns);
+    result = sample_benchmark(benchmark_FFPropagate, nruns, ffnn, xdata, neval);
     cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
 }
 

diff --git a/benchmark/bench_nunits_ffprop/main.cpp b/benchmark/bench_nunits_ffprop/main.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <random>
 
-#include "qnets/io/PrintUtilities.hpp"
+#include "qnets/poly/io/PrintUtilities.hpp"
 
 #include "FFNNBenchmarks.hpp"
 
@@ -13,7 +13,7 @@ void run_single_benchmark(const string &label, FeedForwardNeuralNetwork * const
     pair<double, double> result;
     const double time_scale = 1000000.; //microseconds
 
-    result = sample_benchmark_FFPropagate(ffnn, xdata, neval, nruns);
+    result = sample_benchmark(benchmark_FFPropagate, nruns, ffnn, xdata, neval);
     cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
 }
 

diff --git a/benchmark/bench_templ_ffprop/main.cpp b/benchmark/bench_templ_ffprop/main.cpp
@@ -0,0 +1,134 @@
+#include <iomanip>
+#include <iostream>
+#include <random>
+#include <memory>
+
+#include "qnets/templ/TemplNet.hpp"
+#include "qnets/actf/Sigmoid.hpp"
+
+#include "FFNNBenchmarks.hpp"
+
+using namespace std;
+
+template <class TemplNet>
+void run_single_benchmark(const string &label, TemplNet &tnet, const double xdata[], const int neval, const int nruns)
+{
+    pair<double, double> result;
+    const double time_scale = 1000000.; //microseconds
+
+    result = sample_benchmark(benchmark_TemplProp<TemplNet>, nruns, tnet, xdata, neval);
+    cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
+}
+
+template <int I>
+void run_benchmark_netpack(const double xdata[], const int ndata[], const int xoffset, const int neval[], const int nruns) {}
+
+template <int I, class TNet, class ... Args>
+void run_benchmark_netpack(const double xdata[], const int ndata[], const int xoffset, const int neval[], const int nruns, TNet &tnet, Args& ... tnets)
+{
+    using namespace templ;
+    cout << "FFPropagate benchmark with " << nruns << " runs of " << neval[I] << " FF-Propagations, for a FFNN of shape " << TNet::getNInput() << "x" << TNet::getNUnit(0) << "x" << TNet::getNUnit(1) << "x" << TNet::getNOutput() << " ." << endl;
+    cout << "=========================================================================================" << endl << endl;
+    cout << "Benchmark results (time per propagation):" << endl;
+
+    tnet.dflags.set(DerivConfig::OFF);
+    run_single_benchmark("f", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::D1);
+    run_single_benchmark("f+d1", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::VD1);
+    run_single_benchmark("f+vd1", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::D1_VD1);
+    run_single_benchmark("f+d1+vd1", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::D12);
+    run_single_benchmark("f+d1+d2", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::D12_VD1);
+    run_single_benchmark("f+d1+d2+vd1", tnet, xdata + xoffset, neval[I], nruns);
+
+    tnet.dflags.set(DerivConfig::D12_VD12);
+    run_single_benchmark("f+d1+d2+vd1+vd2", tnet, xdata + xoffset, neval[I], nruns);
+
+    cout << "=========================================================================================" << endl << endl << endl;
+
+    run_benchmark_netpack<I + 1, Args...>(xdata, ndata, xoffset + ndata[I], neval, nruns, tnets...);
+}
+
+int main()
+{
+    using namespace templ;
+
+    const int neval[3] = {200000, 20000, 1000};
+    const int nruns = 5;
+
+    const int yndim = 1;
+    constexpr int xndim[3] = {6, 24, 96}, nhu1[3] = {12, 48, 192}, nhu2[3] = {6, 24, 96};
+
+    constexpr auto dconf = DerivConfig::D12_VD12; // "allocate" for all derivatives
+
+    using RealT = double;
+
+    // Small Net
+    using L1Type_s = LayerConfig<nhu1[0], actf::Sigmoid>;
+    using L2Type_s = LayerConfig<nhu2[0], actf::Sigmoid>;
+    using L3Type_s = LayerConfig<yndim, actf::Sigmoid>;
+    using NetType_s = TemplNet<RealT, dconf, xndim[0], L1Type_s, L2Type_s, L3Type_s>;
+    auto tnet_s_ptr = std::make_unique<NetType_s>();
+    auto &tnet_s = *tnet_s_ptr;
+
+    // Medium Net
+    using L1Type_m = LayerConfig<nhu1[1], actf::Sigmoid>;
+    using L2Type_m = LayerConfig<nhu2[1], actf::Sigmoid>;
+    using L3Type_m = LayerConfig<yndim, actf::Sigmoid>;
+    using NetType_m = TemplNet<RealT, dconf, xndim[1], L1Type_m, L2Type_m, L3Type_m>;
+    auto tnet_m_ptr = std::make_unique<NetType_m>();
+    auto &tnet_m = *tnet_m_ptr;
+
+    // Large Net
+    using L1Type_l = LayerConfig<nhu1[2], actf::Sigmoid>;
+    using L2Type_l = LayerConfig<nhu2[2], actf::Sigmoid>;
+    using L3Type_l = LayerConfig<yndim, actf::Sigmoid>;
+    using NetType_l = TemplNet<RealT, dconf, xndim[2], L1Type_l, L2Type_l, L3Type_l>;
+    auto tnet_l_ptr = std::make_unique<NetType_l>();
+    auto &tnet_l = *tnet_l_ptr;
+
+    // Data
+    int ndata[3], ndata_full = 0;
+    for (int i = 0; i < 3; ++i) {
+        ndata[i] = neval[i]*xndim[i];
+        ndata_full += ndata[i];
+    }
+    auto * xdata = new double[ndata_full]; // xndim input data for propagate bench
+
+    // generate some random input
+    random_device rdev;
+    mt19937_64 rgen;
+    uniform_real_distribution<double> rd;
+    rgen = mt19937_64(rdev());
+    rgen.seed(18984687);
+    rd = uniform_real_distribution<double>(-sqrt(3.), sqrt(3.)); // uniform with variance 1
+    for (int i = 0; i < ndata_full; ++i) {
+        xdata[i] = rd(rgen);
+    }
+
+    for (int i=0; i<tnet_s.getNBeta(); ++i) {
+        tnet_s.setBeta(i, rd(rgen));
+    }
+    for (int i=0; i<tnet_m.getNBeta(); ++i) {
+        tnet_m.setBeta(i, rd(rgen));
+    }
+    for (int i=0; i<tnet_l.getNBeta(); ++i) {
+        tnet_l.setBeta(i, rd(rgen));
+    }
+
+    // FFPropagate benchmark
+    run_benchmark_netpack<0>(xdata, ndata, 0, neval, nruns, tnet_s, tnet_m, tnet_l);
+
+    delete[] xdata;
+
+    return 0;
+}
+
diff --git a/benchmark/bench_templ_ffprop/plot.py b/benchmark/bench_templ_ffprop/plot.py
@@ -0,0 +1,115 @@
+from pylab import *
+
+class benchmark_nunits_ffprop:
+
+    def __init__(self, filename, label):
+        self.label = label
+        self.data = {}
+
+        bnew = True
+        with open(filename) as bmfile:
+            for line in bmfile:
+
+                lsplit = line.split()
+
+                if len(lsplit) < 5:
+                    continue
+
+                if lsplit[0] == 'FFPropagate':
+                    if not bnew:
+                        self.data[net_shape] = net_data # store previous net's data
+
+                    net_shape = lsplit[13]
+                    net_data = {}
+                    bnew = False
+                    continue
+
+                if lsplit[0][0:2] == 'f:' or lsplit[0][0:2] == 'f+':
+                    net_data[lsplit[0][:-1]] = (float(lsplit[1]), float(lsplit[3]))
+
+        self.data[net_shape] = net_data # store last net's data
+
+
+def plot_compare_nets(benchmark_list, **kwargs):
+    nbm = len(benchmark_list)
+    xlabels = benchmark_list[0].data[list(benchmark_list[0].data.keys())[0]].keys() # get the xlabels from first entry in data dict
+
+    fig = figure()
+    fig.suptitle('FFPropagate benchmark, comparing different net sizes',fontsize=14)
+
+    itp=0
+    for benchmark in benchmark_list:
+
+        itp+=1
+        ax = fig.add_subplot(nbm, 1, itp)
+        for net in benchmark.data.keys():
+            values = [v[0] for v in benchmark.data[net].values()]
+            errors = [v[1] for v in benchmark.data[net].values()]
+            ax.errorbar(xlabels, values, xerr=None, yerr=errors, **kwargs)
+
+        ax.set_yscale('log')
+        ax.set_title(benchmark.label + ' version')
+        ax.set_ylabel('Time per propagation [$\mu s$]')
+        ax.legend(benchmark.data.keys())
+
+    return fig
+
+
+def plot_compare_runs(benchmark_list, net_list, width = 0.8, **kwargs):
+    nbm = len(benchmark_list)-1
+    if nbm <= 0:
+        print('Error: Not enough benchmarks for comparison plot.')
+        return None
+
+    bwidth = width/float(nbm)
+    nnet = len(net_list)
+    if nbm > 1:
+        ind = arange(len(benchmark_list[0].data[net_list[0]]), 0, -1)
+    else:
+        ind = arange(len(benchmark_list[0].data[net_list[0]]), 0, -1) - 0.5*bwidth
+    xlabels = benchmark_list[0].data[net_list[0]].keys()
+
+    fig = figure()
+    fig.suptitle('FFPropagate benchmark, comparing against ' + benchmark_list[0].label + ' version',fontsize=14)
+
+    itp = 0
+    for ita, net in enumerate(net_list):
+
+            itp+=1
+            ax = fig.add_subplot(nnet, 1, itp)
+            scales = array([100./v[0] for v in benchmark_list[0].data[net].values()]) # we will normalize data to the first benchmark's results
+            for itb, benchmark in enumerate(benchmark_list[1:]):
+                values = array([v[0] for v in benchmark.data[net].values()])*scales
+                errors = array([v[1] for v in benchmark.data[net].values()])*scales
+                rects = ax.barh(ind - itb*bwidth, values, bwidth, xerr=errors, **kwargs)
+                for rect in rects:
+                    ax.text(1., rect.get_y() + rect.get_height()/2., '%d' % int(rect.get_width()), ha='left', va='center', fontsize=8)
+
+            ax.set_title(net + ' net')
+            if ita==len(net_list)-1:
+                ax.set_xlabel('Time per propagation [%]')
+            ax.set_xlim([0,200])
+            ax.set_yticks(ind - 0.5*(nbm-1)*bwidth)
+            ax.set_yticklabels(xlabels)
+            ax.legend([benchmark.label for benchmark in benchmark_list[1:]])
+
+    return fig
+
+# Script
+
+benchmark_list = []
+for benchmark_file in sys.argv[1:]:
+    try:
+        benchmark = benchmark_nunits_ffprop(benchmark_file, benchmark_file.split('_')[1].split('.')[0])
+        benchmark_list.append(benchmark)
+    except(OSError):
+        print("Warning: Couldn't load benchmark file " + benchmark_file + "!")
+
+if not benchmark_list:
+    print("Error: Not even one benchmark loaded!")
+else:
+    fig1 = plot_compare_nets(benchmark_list, fmt='o--')
+    if benchmark_list:
+        fig2 = plot_compare_runs(benchmark_list, ['6x12x6x1', '24x48x24x1', '96x192x96x1'])
+
+show()
diff --git a/benchmark/common/FFNNBenchmarks.hpp b/benchmark/common/FFNNBenchmarks.hpp
@@ -3,7 +3,7 @@
 #include <tuple>
 
 #include "Timer.hpp"
-#include "qnets/net/FeedForwardNeuralNetwork.hpp"
+#include "qnets/poly/FeedForwardNeuralNetwork.hpp"
 
 inline double benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const double * const xdata, const int neval)
 {
@@ -19,26 +19,22 @@ inline double benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const
     return timer.elapsed();
 }
 
-inline std::pair<double, double> sample_benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const double * const xdata, const int neval, const int nruns)
+template <class TemplNet>
+inline double benchmark_TemplProp(TemplNet &tnet, const double xdata[], const int neval)
 {
-    double times[nruns];
-    double mean = 0., err = 0.;
+    Timer timer(1.);
+    const int ninput = tnet.getNInput();
 
-    for (int i = 0; i < nruns; ++i) {
-        times[i] = benchmark_FFPropagate(ffnn, xdata, neval);
-        mean += times[i];
-    }
-    mean /= nruns;
-    for (int i = 0; i < nruns; ++i) {
-        err += pow(times[i] - mean, 2);
+    timer.reset();
+    for (int i = 0; i < neval; ++i) {
+        tnet.setInput(xdata + i*ninput, xdata + (i+1)*ninput);
+        tnet.FFPropagate();
     }
-    err /= (nruns - 1)*nruns; // variance of the mean
-    err = sqrt(err); // standard error of the mean
 
-    const std::pair<double, double> result(mean, err);
-    return result;
+    return timer.elapsed();
 }
 
+
 inline double benchmark_actf_derivs(ActivationFunctionInterface * const actf, const double * const xdata, const int neval, const bool flag_d1 = true, const bool flag_d2 = true, const bool flag_d3 = true, const bool flag_fad = true)
 {
     Timer timer(1.);
@@ -62,13 +58,14 @@ inline double benchmark_actf_derivs(ActivationFunctionInterface * const actf, co
     return timer.elapsed();
 }
 
-inline std::pair<double, double> sample_benchmark_actf_derivs(ActivationFunctionInterface * const actf, const double * const xdata, const int neval, const int nruns, const bool flag_d1 = true, const bool flag_d2 = true, const bool flag_d3 = true, const bool flag_fad = true)
+template <class BenchT, class ... Args>
+inline std::pair<double, double> sample_benchmark(BenchT bench, const int nruns, Args&& ... args)
 {
     double times[nruns];
     double mean = 0., err = 0.;
 
     for (int i = 0; i < nruns; ++i) {
-        times[i] = benchmark_actf_derivs(actf, xdata, neval, flag_d1, flag_d2, flag_d3, flag_fad);
+        times[i] = bench(args...);
         mean += times[i];
     }
     mean /= nruns;