Skip to content

Commit

Permalink
Merge pull request #38 from DCM-UPB/templnet
Browse files Browse the repository at this point in the history
Templnet
  • Loading branch information
Ithanil authored Jul 23, 2019
2 parents 35594c6 + e30c604 commit 856b3c3
Show file tree
Hide file tree
Showing 143 changed files with 2,479 additions and 363 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ matrix:
- USE_OPENMP="TRUE"

- os: osx
osx_image: xcode10.1
osx_image: xcode11
env:
- MYCXX="g++"
- USE_DOCKER="FALSE"
- USE_GCOV="TRUE"
- USE_OPENMP="FALSE"

- os: osx
osx_image: xcode10.1
osx_image: xcode11
env:
- MYCXX="clang++"
- USE_DOCKER="FALSE"
Expand Down
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ link_libraries(qnets)
add_executable(bench_actfs_derivs bench_actfs_derivs/main.cpp)
add_executable(bench_actfs_ffprop bench_actfs_ffprop/main.cpp)
add_executable(bench_nunits_ffprop bench_nunits_ffprop/main.cpp)
add_executable(bench_templ_ffprop bench_templ_ffprop/main.cpp)
4 changes: 2 additions & 2 deletions benchmark/bench_actfs_derivs/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <iostream>
#include <random>

#include "qnets/actf/ActivationFunctionManager.hpp"
#include "qnets/poly/actf/ActivationFunctionManager.hpp"

#include "FFNNBenchmarks.hpp"

Expand All @@ -13,7 +13,7 @@ void run_single_benchmark(const string &label, const string &actf_id, const doub
pair<double, double> result;
const double time_scale = 1000000000.; //nanoseconds

result = sample_benchmark_actf_derivs(std_actf::provideActivationFunction(actf_id), xdata, neval, nruns, flag_d1, flag_d2, flag_d3, flag_fad);
result = sample_benchmark(benchmark_actf_derivs, nruns, std_actf::provideActivationFunction(actf_id), xdata, neval, flag_d1, flag_d2, flag_d3, flag_fad);
cout << label << ":" << setw(max(1, 11 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " nanoseconds" << endl;
}

Expand Down
6 changes: 3 additions & 3 deletions benchmark/bench_actfs_ffprop/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include <iostream>
#include <random>

#include "qnets/actf/ActivationFunctionManager.hpp"
#include "qnets/io/PrintUtilities.hpp"
#include "qnets/poly/actf/ActivationFunctionManager.hpp"
#include "qnets/poly/io/PrintUtilities.hpp"

#include "FFNNBenchmarks.hpp"

Expand All @@ -14,7 +14,7 @@ void run_single_benchmark(const string &label, FeedForwardNeuralNetwork * const
pair<double, double> result;
const double time_scale = 1000000.; //microseconds

result = sample_benchmark_FFPropagate(ffnn, xdata, neval, nruns);
result = sample_benchmark(benchmark_FFPropagate, nruns, ffnn, xdata, neval);
cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
}

Expand Down
4 changes: 2 additions & 2 deletions benchmark/bench_nunits_ffprop/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <iostream>
#include <random>

#include "qnets/io/PrintUtilities.hpp"
#include "qnets/poly/io/PrintUtilities.hpp"

#include "FFNNBenchmarks.hpp"

Expand All @@ -13,7 +13,7 @@ void run_single_benchmark(const string &label, FeedForwardNeuralNetwork * const
pair<double, double> result;
const double time_scale = 1000000.; //microseconds

result = sample_benchmark_FFPropagate(ffnn, xdata, neval, nruns);
result = sample_benchmark(benchmark_FFPropagate, nruns, ffnn, xdata, neval);
cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
}

Expand Down
134 changes: 134 additions & 0 deletions benchmark/bench_templ_ffprop/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include <iomanip>
#include <iostream>
#include <random>
#include <memory>

#include "qnets/templ/TemplNet.hpp"
#include "qnets/actf/Sigmoid.hpp"

#include "FFNNBenchmarks.hpp"

using namespace std;

template <class TemplNet>
void run_single_benchmark(const string &label, TemplNet &tnet, const double xdata[], const int neval, const int nruns)
{
pair<double, double> result;
const double time_scale = 1000000.; //microseconds

result = sample_benchmark(benchmark_TemplProp<TemplNet>, nruns, tnet, xdata, neval);
cout << label << ":" << setw(max(1, 20 - static_cast<int>(label.length()))) << setfill(' ') << " " << result.first/neval*time_scale << " +- " << result.second/neval*time_scale << " microseconds" << endl;
}

template <int I>
void run_benchmark_netpack(const double xdata[], const int ndata[], const int xoffset, const int neval[], const int nruns) {}

template <int I, class TNet, class ... Args>
void run_benchmark_netpack(const double xdata[], const int ndata[], const int xoffset, const int neval[], const int nruns, TNet &tnet, Args& ... tnets)
{
using namespace templ;
cout << "FFPropagate benchmark with " << nruns << " runs of " << neval[I] << " FF-Propagations, for a FFNN of shape " << TNet::getNInput() << "x" << TNet::getNUnit(0) << "x" << TNet::getNUnit(1) << "x" << TNet::getNOutput() << " ." << endl;
cout << "=========================================================================================" << endl << endl;
cout << "Benchmark results (time per propagation):" << endl;

tnet.dflags.set(DerivConfig::OFF);
run_single_benchmark("f", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::D1);
run_single_benchmark("f+d1", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::VD1);
run_single_benchmark("f+vd1", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::D1_VD1);
run_single_benchmark("f+d1+vd1", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::D12);
run_single_benchmark("f+d1+d2", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::D12_VD1);
run_single_benchmark("f+d1+d2+vd1", tnet, xdata + xoffset, neval[I], nruns);

tnet.dflags.set(DerivConfig::D12_VD12);
run_single_benchmark("f+d1+d2+vd1+vd2", tnet, xdata + xoffset, neval[I], nruns);

cout << "=========================================================================================" << endl << endl << endl;

run_benchmark_netpack<I + 1, Args...>(xdata, ndata, xoffset + ndata[I], neval, nruns, tnets...);
}

int main()
{
using namespace templ;

const int neval[3] = {200000, 20000, 1000};
const int nruns = 5;

const int yndim = 1;
constexpr int xndim[3] = {6, 24, 96}, nhu1[3] = {12, 48, 192}, nhu2[3] = {6, 24, 96};

constexpr auto dconf = DerivConfig::D12_VD12; // "allocate" for all derivatives

using RealT = double;

// Small Net
using L1Type_s = LayerConfig<nhu1[0], actf::Sigmoid>;
using L2Type_s = LayerConfig<nhu2[0], actf::Sigmoid>;
using L3Type_s = LayerConfig<yndim, actf::Sigmoid>;
using NetType_s = TemplNet<RealT, dconf, xndim[0], L1Type_s, L2Type_s, L3Type_s>;
auto tnet_s_ptr = std::make_unique<NetType_s>();
auto &tnet_s = *tnet_s_ptr;

// Medium Net
using L1Type_m = LayerConfig<nhu1[1], actf::Sigmoid>;
using L2Type_m = LayerConfig<nhu2[1], actf::Sigmoid>;
using L3Type_m = LayerConfig<yndim, actf::Sigmoid>;
using NetType_m = TemplNet<RealT, dconf, xndim[1], L1Type_m, L2Type_m, L3Type_m>;
auto tnet_m_ptr = std::make_unique<NetType_m>();
auto &tnet_m = *tnet_m_ptr;

// Large Net
using L1Type_l = LayerConfig<nhu1[2], actf::Sigmoid>;
using L2Type_l = LayerConfig<nhu2[2], actf::Sigmoid>;
using L3Type_l = LayerConfig<yndim, actf::Sigmoid>;
using NetType_l = TemplNet<RealT, dconf, xndim[2], L1Type_l, L2Type_l, L3Type_l>;
auto tnet_l_ptr = std::make_unique<NetType_l>();
auto &tnet_l = *tnet_l_ptr;

// Data
int ndata[3], ndata_full = 0;
for (int i = 0; i < 3; ++i) {
ndata[i] = neval[i]*xndim[i];
ndata_full += ndata[i];
}
auto * xdata = new double[ndata_full]; // xndim input data for propagate bench

// generate some random input
random_device rdev;
mt19937_64 rgen;
uniform_real_distribution<double> rd;
rgen = mt19937_64(rdev());
rgen.seed(18984687);
rd = uniform_real_distribution<double>(-sqrt(3.), sqrt(3.)); // uniform with variance 1
for (int i = 0; i < ndata_full; ++i) {
xdata[i] = rd(rgen);
}

for (int i=0; i<tnet_s.getNBeta(); ++i) {
tnet_s.setBeta(i, rd(rgen));
}
for (int i=0; i<tnet_m.getNBeta(); ++i) {
tnet_m.setBeta(i, rd(rgen));
}
for (int i=0; i<tnet_l.getNBeta(); ++i) {
tnet_l.setBeta(i, rd(rgen));
}

// FFPropagate benchmark
run_benchmark_netpack<0>(xdata, ndata, 0, neval, nruns, tnet_s, tnet_m, tnet_l);

delete[] xdata;

return 0;
}

115 changes: 115 additions & 0 deletions benchmark/bench_templ_ffprop/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from pylab import *

class benchmark_nunits_ffprop:

def __init__(self, filename, label):
self.label = label
self.data = {}

bnew = True
with open(filename) as bmfile:
for line in bmfile:

lsplit = line.split()

if len(lsplit) < 5:
continue

if lsplit[0] == 'FFPropagate':
if not bnew:
self.data[net_shape] = net_data # store previous net's data

net_shape = lsplit[13]
net_data = {}
bnew = False
continue

if lsplit[0][0:2] == 'f:' or lsplit[0][0:2] == 'f+':
net_data[lsplit[0][:-1]] = (float(lsplit[1]), float(lsplit[3]))

self.data[net_shape] = net_data # store last net's data


def plot_compare_nets(benchmark_list, **kwargs):
nbm = len(benchmark_list)
xlabels = benchmark_list[0].data[list(benchmark_list[0].data.keys())[0]].keys() # get the xlabels from first entry in data dict

fig = figure()
fig.suptitle('FFPropagate benchmark, comparing different net sizes',fontsize=14)

itp=0
for benchmark in benchmark_list:

itp+=1
ax = fig.add_subplot(nbm, 1, itp)
for net in benchmark.data.keys():
values = [v[0] for v in benchmark.data[net].values()]
errors = [v[1] for v in benchmark.data[net].values()]
ax.errorbar(xlabels, values, xerr=None, yerr=errors, **kwargs)

ax.set_yscale('log')
ax.set_title(benchmark.label + ' version')
ax.set_ylabel('Time per propagation [$\mu s$]')
ax.legend(benchmark.data.keys())

return fig


def plot_compare_runs(benchmark_list, net_list, width = 0.8, **kwargs):
nbm = len(benchmark_list)-1
if nbm <= 0:
print('Error: Not enough benchmarks for comparison plot.')
return None

bwidth = width/float(nbm)
nnet = len(net_list)
if nbm > 1:
ind = arange(len(benchmark_list[0].data[net_list[0]]), 0, -1)
else:
ind = arange(len(benchmark_list[0].data[net_list[0]]), 0, -1) - 0.5*bwidth
xlabels = benchmark_list[0].data[net_list[0]].keys()

fig = figure()
fig.suptitle('FFPropagate benchmark, comparing against ' + benchmark_list[0].label + ' version',fontsize=14)

itp = 0
for ita, net in enumerate(net_list):

itp+=1
ax = fig.add_subplot(nnet, 1, itp)
scales = array([100./v[0] for v in benchmark_list[0].data[net].values()]) # we will normalize data to the first benchmark's results
for itb, benchmark in enumerate(benchmark_list[1:]):
values = array([v[0] for v in benchmark.data[net].values()])*scales
errors = array([v[1] for v in benchmark.data[net].values()])*scales
rects = ax.barh(ind - itb*bwidth, values, bwidth, xerr=errors, **kwargs)
for rect in rects:
ax.text(1., rect.get_y() + rect.get_height()/2., '%d' % int(rect.get_width()), ha='left', va='center', fontsize=8)

ax.set_title(net + ' net')
if ita==len(net_list)-1:
ax.set_xlabel('Time per propagation [%]')
ax.set_xlim([0,200])
ax.set_yticks(ind - 0.5*(nbm-1)*bwidth)
ax.set_yticklabels(xlabels)
ax.legend([benchmark.label for benchmark in benchmark_list[1:]])

return fig

# Script

benchmark_list = []
for benchmark_file in sys.argv[1:]:
try:
benchmark = benchmark_nunits_ffprop(benchmark_file, benchmark_file.split('_')[1].split('.')[0])
benchmark_list.append(benchmark)
except(OSError):
print("Warning: Couldn't load benchmark file " + benchmark_file + "!")

if not benchmark_list:
print("Error: Not even one benchmark loaded!")
else:
fig1 = plot_compare_nets(benchmark_list, fmt='o--')
if benchmark_list:
fig2 = plot_compare_runs(benchmark_list, ['6x12x6x1', '24x48x24x1', '96x192x96x1'])

show()
31 changes: 14 additions & 17 deletions benchmark/common/FFNNBenchmarks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <tuple>

#include "Timer.hpp"
#include "qnets/net/FeedForwardNeuralNetwork.hpp"
#include "qnets/poly/FeedForwardNeuralNetwork.hpp"

inline double benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const double * const xdata, const int neval)
{
Expand All @@ -19,26 +19,22 @@ inline double benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const
return timer.elapsed();
}

inline std::pair<double, double> sample_benchmark_FFPropagate(FeedForwardNeuralNetwork * const ffnn, const double * const xdata, const int neval, const int nruns)
template <class TemplNet>
inline double benchmark_TemplProp(TemplNet &tnet, const double xdata[], const int neval)
{
double times[nruns];
double mean = 0., err = 0.;
Timer timer(1.);
const int ninput = tnet.getNInput();

for (int i = 0; i < nruns; ++i) {
times[i] = benchmark_FFPropagate(ffnn, xdata, neval);
mean += times[i];
}
mean /= nruns;
for (int i = 0; i < nruns; ++i) {
err += pow(times[i] - mean, 2);
timer.reset();
for (int i = 0; i < neval; ++i) {
tnet.setInput(xdata + i*ninput, xdata + (i+1)*ninput);
tnet.FFPropagate();
}
err /= (nruns - 1)*nruns; // variance of the mean
err = sqrt(err); // standard error of the mean

const std::pair<double, double> result(mean, err);
return result;
return timer.elapsed();
}


inline double benchmark_actf_derivs(ActivationFunctionInterface * const actf, const double * const xdata, const int neval, const bool flag_d1 = true, const bool flag_d2 = true, const bool flag_d3 = true, const bool flag_fad = true)
{
Timer timer(1.);
Expand All @@ -62,13 +58,14 @@ inline double benchmark_actf_derivs(ActivationFunctionInterface * const actf, co
return timer.elapsed();
}

inline std::pair<double, double> sample_benchmark_actf_derivs(ActivationFunctionInterface * const actf, const double * const xdata, const int neval, const int nruns, const bool flag_d1 = true, const bool flag_d2 = true, const bool flag_d3 = true, const bool flag_fad = true)
template <class BenchT, class ... Args>
inline std::pair<double, double> sample_benchmark(BenchT bench, const int nruns, Args&& ... args)
{
double times[nruns];
double mean = 0., err = 0.;

for (int i = 0; i < nruns; ++i) {
times[i] = benchmark_actf_derivs(actf, xdata, neval, flag_d1, flag_d2, flag_d3, flag_fad);
times[i] = bench(args...);
mean += times[i];
}
mean /= nruns;
Expand Down
Loading

0 comments on commit 856b3c3

Please sign in to comment.