Skip to content

Commit

Permalink
Add option to use FFTW_MEASURE and wisdom in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
malcolmroberts authored Jun 1, 2020
1 parent 37d7c79 commit 899dd34
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 28 deletions.
62 changes: 62 additions & 0 deletions clients/tests/fftw_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ inline typename fftw_trait<Tfloat>::fftw_plan_type
typename fftw_trait<Tfloat>::fftw_complex_type* out,
int sign,
unsigned flags);

template <>
inline typename fftw_trait<float>::fftw_plan_type
fftw_plan_guru64_dft<float>(int rank,
Expand All @@ -196,6 +197,7 @@ inline typename fftw_trait<float>::fftw_plan_type
{
return fftwf_plan_guru64_dft(rank, dims, howmany_rank, howmany_dims, in, out, sign, flags);
}

template <>
inline typename fftw_trait<double>::fftw_plan_type
fftw_plan_guru64_dft<double>(int rank,
Expand All @@ -210,6 +212,26 @@ inline typename fftw_trait<double>::fftw_plan_type
return fftw_plan_guru64_dft(rank, dims, howmany_rank, howmany_dims, in, out, sign, flags);
}

// Template wrappers for FFTW c2c executors:
template <typename Tfloat>
inline void fftw_plan_execute_c2c(typename fftw_trait<Tfloat>::fftw_plan_type plan,
typename fftw_trait<Tfloat>::fftw_complex_type* in,
typename fftw_trait<Tfloat>::fftw_complex_type* out);
template <>
inline void fftw_plan_execute_c2c<float>(typename fftw_trait<float>::fftw_plan_type plan,
typename fftw_trait<float>::fftw_complex_type* in,
typename fftw_trait<float>::fftw_complex_type* out)
{
fftwf_execute_dft(plan, in, out);
}
template <>
inline void fftw_plan_execute_c2c<double>(typename fftw_trait<double>::fftw_plan_type plan,
typename fftw_trait<double>::fftw_complex_type* in,
typename fftw_trait<double>::fftw_complex_type* out)
{
fftw_execute_dft(plan, in, out);
}

// Template wrappers for FFTW r2c planners:
template <typename Tfloat>
inline typename fftw_trait<Tfloat>::fftw_plan_type
Expand Down Expand Up @@ -245,6 +267,26 @@ inline typename fftw_trait<double>::fftw_plan_type
return fftw_plan_guru64_dft_r2c(rank, dims, howmany_rank, howmany_dims, in, out, flags);
}

// Template wrappers for FFTW r2c executors:
template <typename Tfloat>
inline void fftw_plan_execute_r2c(typename fftw_trait<Tfloat>::fftw_plan_type plan,
Tfloat* in,
typename fftw_trait<Tfloat>::fftw_complex_type* out);
template <>
inline void fftw_plan_execute_r2c<float>(typename fftw_trait<float>::fftw_plan_type plan,
float* in,
typename fftw_trait<float>::fftw_complex_type* out)
{
fftwf_execute_dft_r2c(plan, in, out);
}
template <>
inline void fftw_plan_execute_r2c<double>(typename fftw_trait<double>::fftw_plan_type plan,
double* in,
typename fftw_trait<double>::fftw_complex_type* out)
{
fftw_execute_dft_r2c(plan, in, out);
}

// Template wrappers for FFTW c2r planners:
template <typename Tfloat>
inline typename fftw_trait<Tfloat>::fftw_plan_type
Expand Down Expand Up @@ -280,6 +322,26 @@ inline typename fftw_trait<double>::fftw_plan_type
return fftw_plan_guru64_dft_c2r(rank, dims, howmany_rank, howmany_dims, in, out, flags);
}

// Template wrappers for FFTW c2r executors:
template <typename Tfloat>
inline void fftw_plan_execute_c2r(typename fftw_trait<Tfloat>::fftw_plan_type plan,
typename fftw_trait<Tfloat>::fftw_complex_type* in,
Tfloat* out);
template <>
inline void fftw_plan_execute_c2r<float>(typename fftw_trait<float>::fftw_plan_type plan,
typename fftw_trait<float>::fftw_complex_type* in,
float* out)
{
fftwf_execute_dft_c2r(plan, in, out);
}
template <>
inline void fftw_plan_execute_c2r<double>(typename fftw_trait<double>::fftw_plan_type plan,
typename fftw_trait<double>::fftw_complex_type* in,
double* out)
{
fftw_execute_dft_c2r(plan, in, out);
}

// Allocator / deallocator for FFTW arrays.
template <typename Tdata>
class fftwAllocator : public std::allocator<Tdata>
Expand Down
88 changes: 83 additions & 5 deletions clients/tests/gtest_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@
/// @brief googletest based unit tester for rocfft
///

#include <fstream>
#include <gtest/gtest.h>
#include <iostream>
#include <streambuf>
#include <string>

#include "accuracy_test.h"
#include "fftw_transform.h"
Expand All @@ -50,20 +53,26 @@ std::vector<size_t> length;
size_t istride0;
size_t ostride0;

// Control whether we use FFTW's wisdom (which we use to imply FFTW_MEASURE).
bool use_fftw_wisdom = false;

int main(int argc, char* argv[])
{
// NB: If we initialize gtest first, then it removes all of its own command-line
// arguments and sets argc and argv correctly; no need to jump through hoops for
// boost::program_options.
::testing::InitGoogleTest(&argc, argv);

// Filename for fftw and fftwf wisdom.
std::string fftw_wisdom_filename;

// Declare the supported options.
// clang-format doesn't handle boost program options very well:
// clang-format off
po::options_description opdesc("rocFFT Runtime Test command line options");
opdesc.add_options()("help,h", "produces this help message")(
"verbose,v",
po::value<int>()->default_value(0),
opdesc.add_options()
("help,h", "produces this help message")
("verbose,v", po::value<int>()->default_value(0),
"print out detailed information for the tests.")
("transformType,t", po::value<rocfft_transform_type>(&transformType)
->default_value(rocfft_transform_type_complex_forward),
Expand All @@ -85,8 +94,13 @@ int main(int argc, char* argv[])
( "istride0", po::value<size_t>(&istride0)->default_value(1),
"Input stride ")
( "ostride0", po::value<size_t>(&ostride0)->default_value(1),
"Output stride ");
"Output stride ")
("wise,w", "use FFTW wisdom")
("wisdomfile,W",
po::value<std::string>(&fftw_wisdom_filename)->default_value("wisdom3.txt"),
"FFTW3 wisdom filename");
// clang-format on

po::variables_map vm;
po::store(po::parse_command_line(argc, argv, opdesc), vm);
po::notify(vm);
Expand All @@ -101,6 +115,11 @@ int main(int argc, char* argv[])

verbose = vm["verbose"].as<int>();

if(vm.count("wise"))
{
use_fftw_wisdom = true;
}

if(length.size() == 0)
{
length.push_back(8);
Expand All @@ -111,7 +130,66 @@ int main(int argc, char* argv[])
rocfft_get_version_string(v, 256);
std::cout << "rocFFT version: " << v << std::endl;

return RUN_ALL_TESTS();
if(use_fftw_wisdom)
{
if(verbose)
{
std::cout << "Using " << fftw_wisdom_filename << " wisdom file\n";
}
std::ifstream fftw_wisdom_file(fftw_wisdom_filename);
std::string allwisdom = std::string(std::istreambuf_iterator<char>(fftw_wisdom_file),
std::istreambuf_iterator<char>());

std::string fftw_wisdom;
std::string fftwf_wisdom;

bool load_wisdom = false;
bool load_fwisdom = false;
std::istringstream input;
input.str(allwisdom);
// Separate the single-precision and double-precision wisdom:
for(std::string line; std::getline(input, line);)
{
if(line.rfind("(fftw", 0) == 0 && line.find("fftw_wisdom") != std::string::npos)
{
load_wisdom = true;
}
if(line.rfind("(fftw", 0) == 0 && line.find("fftwf_wisdom") != std::string::npos)
{
load_fwisdom = true;
}
if(load_wisdom)
{
fftw_wisdom.append(line + "\n");
}
if(load_fwisdom)
{
fftwf_wisdom.append(line + "\n");
}
if(line.rfind(")", 0) == 0)
{
load_wisdom = false;
load_fwisdom = false;
}
}
fftw_import_wisdom_from_string(fftw_wisdom.c_str());
fftwf_import_wisdom_from_string(fftwf_wisdom.c_str());
}

auto retval = RUN_ALL_TESTS();

if(use_fftw_wisdom)
{
std::string fftw_wisdom = std::string(fftw_export_wisdom_to_string());
std::string fftwf_wisdom = std::string(fftwf_export_wisdom_to_string());
fftw_wisdom.append(std::string(fftwf_export_wisdom_to_string()));
std::ofstream fftw_wisdom_file(fftw_wisdom_filename);
fftw_wisdom_file << fftw_wisdom;
fftw_wisdom_file << fftwf_wisdom;
fftw_wisdom_file.close();
}

return retval;
}

TEST(manual, vs_fftw)
Expand Down
84 changes: 61 additions & 23 deletions clients/tests/rocfft_against_fftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,71 +80,101 @@ inline bool
return true;
}

extern bool use_fftw_wisdom;

// Perform and out-of-place computation on contiguous data and then return this in an
// an object which will destruct correctly.
template <typename Tfloat, typename Tallocator>
inline std::vector<std::vector<char, Tallocator>>
fftw_transform(const std::vector<fftw_iodim64> dims,
const std::vector<fftw_iodim64> howmany_dims,
const rocfft_transform_type transformType,
const size_t isize,
const size_t osize,
void* cpu_in)
{
typename fftw_trait<Tfloat>::fftw_plan_type cpu_plan = NULL;
using fftw_complex_type = typename fftw_trait<Tfloat>::fftw_complex_type;

// NB: Using FFTW_MEASURE implies that the input buffer's data may be destroyed during plan
// creation. Therefore, we create a dummy input buffer.
std::vector<char, Tallocator> dummy_input(1);

std::vector<std::vector<char, Tallocator>> output(1);

switch(transformType)
{
case rocfft_transform_type_complex_forward:
{
dummy_input.resize(isize * sizeof(fftw_complex_type));
output[0].resize(osize * sizeof(fftw_complex_type));
cpu_plan
= fftw_plan_guru64_dft<Tfloat>(dims.size(),
dims.data(),
howmany_dims.size(),
howmany_dims.data(),
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<fftw_complex_type*>(dummy_input.data()),
reinterpret_cast<fftw_complex_type*>(output[0].data()),
-1,
FFTW_ESTIMATE);
break;
use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE);
fftw_plan_execute_c2c<Tfloat>(cpu_plan,
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<fftw_complex_type*>(output[0].data()));
}
break;
case rocfft_transform_type_complex_inverse:
{
dummy_input.resize(isize * sizeof(fftw_complex_type));
output[0].resize(osize * sizeof(fftw_complex_type));
cpu_plan
= fftw_plan_guru64_dft<Tfloat>(dims.size(),
dims.data(),
howmany_dims.size(),
howmany_dims.data(),
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<fftw_complex_type*>(dummy_input.data()),
reinterpret_cast<fftw_complex_type*>(output[0].data()),
1,
FFTW_ESTIMATE);
break;
use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE);
fftw_plan_execute_c2c<Tfloat>(cpu_plan,
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<fftw_complex_type*>(output[0].data()));
}
break;
case rocfft_transform_type_real_forward:
{
dummy_input.resize(isize * sizeof(Tfloat));
output[0].resize(osize * sizeof(fftw_complex_type));
cpu_plan
= fftw_plan_guru64_r2c<Tfloat>(dims.size(),
dims.data(),
howmany_dims.size(),
howmany_dims.data(),
reinterpret_cast<Tfloat*>(cpu_in),
reinterpret_cast<Tfloat*>(dummy_input.data()),
reinterpret_cast<fftw_complex_type*>(output[0].data()),
FFTW_ESTIMATE);
use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE);
fftw_plan_execute_r2c<Tfloat>(cpu_plan,
reinterpret_cast<Tfloat*>(cpu_in),
reinterpret_cast<fftw_complex_type*>(output[0].data()));
break;
}
case rocfft_transform_type_real_inverse:
{
dummy_input.resize(isize * sizeof(fftw_complex_type));
output[0].resize(osize * sizeof(Tfloat));
cpu_plan = fftw_plan_guru64_c2r<Tfloat>(dims.size(),
dims.data(),
howmany_dims.size(),
howmany_dims.data(),
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<Tfloat*>(output[0].data()),
FFTW_ESTIMATE);
break;
cpu_plan
= fftw_plan_guru64_c2r<Tfloat>(dims.size(),
dims.data(),
howmany_dims.size(),
howmany_dims.data(),
reinterpret_cast<fftw_complex_type*>(dummy_input.data()),
reinterpret_cast<Tfloat*>(output[0].data()),
use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE);
fftw_plan_execute_c2r<Tfloat>(cpu_plan,
reinterpret_cast<fftw_complex_type*>(cpu_in),
reinterpret_cast<Tfloat*>(output[0].data()));
}
break;
}

// Execute the CPU transform:
fftw_execute_type<Tfloat>(cpu_plan);

fftw_destroy_plan_type(cpu_plan);
return output;
Expand Down Expand Up @@ -184,12 +214,20 @@ inline std::vector<std::vector<char, Tallocator>>
switch(precision)
{
case rocfft_precision_single:
return fftw_transform<float, Tallocator>(
dims, howmany_dims, transformType, odist * nbatch, (void*)input[0].data());
return fftw_transform<float, Tallocator>(dims,
howmany_dims,
transformType,
idist * nbatch,
odist * nbatch,
(void*)input[0].data());
break;
case rocfft_precision_double:
return fftw_transform<double, Tallocator>(
dims, howmany_dims, transformType, odist * nbatch, (void*)input[0].data());
return fftw_transform<double, Tallocator>(dims,
howmany_dims,
transformType,
idist * nbatch,
odist * nbatch,
(void*)input[0].data());
break;
}
}
Expand Down

0 comments on commit 899dd34

Please sign in to comment.