diff --git a/clients/tests/fftw_transform.h b/clients/tests/fftw_transform.h index 6dc3859f..8ece0e3a 100644 --- a/clients/tests/fftw_transform.h +++ b/clients/tests/fftw_transform.h @@ -183,6 +183,7 @@ inline typename fftw_trait::fftw_plan_type typename fftw_trait::fftw_complex_type* out, int sign, unsigned flags); + template <> inline typename fftw_trait::fftw_plan_type fftw_plan_guru64_dft(int rank, @@ -196,6 +197,7 @@ inline typename fftw_trait::fftw_plan_type { return fftwf_plan_guru64_dft(rank, dims, howmany_rank, howmany_dims, in, out, sign, flags); } + template <> inline typename fftw_trait::fftw_plan_type fftw_plan_guru64_dft(int rank, @@ -210,6 +212,26 @@ inline typename fftw_trait::fftw_plan_type return fftw_plan_guru64_dft(rank, dims, howmany_rank, howmany_dims, in, out, sign, flags); } +// Template wrappers for FFTW c2c executors: +template +inline void fftw_plan_execute_c2c(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + typename fftw_trait::fftw_complex_type* out); +template <> +inline void fftw_plan_execute_c2c(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + typename fftw_trait::fftw_complex_type* out) +{ + fftwf_execute_dft(plan, in, out); +} +template <> +inline void fftw_plan_execute_c2c(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + typename fftw_trait::fftw_complex_type* out) +{ + fftw_execute_dft(plan, in, out); +} + // Template wrappers for FFTW r2c planners: template inline typename fftw_trait::fftw_plan_type @@ -245,6 +267,26 @@ inline typename fftw_trait::fftw_plan_type return fftw_plan_guru64_dft_r2c(rank, dims, howmany_rank, howmany_dims, in, out, flags); } +// Template wrappers for FFTW r2c executors: +template +inline void fftw_plan_execute_r2c(typename fftw_trait::fftw_plan_type plan, + Tfloat* in, + typename fftw_trait::fftw_complex_type* out); +template <> +inline void fftw_plan_execute_r2c(typename fftw_trait::fftw_plan_type plan, + float* in, + typename fftw_trait::fftw_complex_type* out) +{ + fftwf_execute_dft_r2c(plan, in, out); +} +template <> +inline void fftw_plan_execute_r2c(typename fftw_trait::fftw_plan_type plan, + double* in, + typename fftw_trait::fftw_complex_type* out) +{ + fftw_execute_dft_r2c(plan, in, out); +} + // Template wrappers for FFTW c2r planners: template inline typename fftw_trait::fftw_plan_type @@ -280,6 +322,26 @@ inline typename fftw_trait::fftw_plan_type return fftw_plan_guru64_dft_c2r(rank, dims, howmany_rank, howmany_dims, in, out, flags); } +// Template wrappers for FFTW c2r executors: +template +inline void fftw_plan_execute_c2r(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + Tfloat* out); +template <> +inline void fftw_plan_execute_c2r(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + float* out) +{ + fftwf_execute_dft_c2r(plan, in, out); +} +template <> +inline void fftw_plan_execute_c2r(typename fftw_trait::fftw_plan_type plan, + typename fftw_trait::fftw_complex_type* in, + double* out) +{ + fftw_execute_dft_c2r(plan, in, out); +} + // Allocator / deallocator for FFTW arrays. template class fftwAllocator : public std::allocator diff --git a/clients/tests/gtest_main.cpp b/clients/tests/gtest_main.cpp index 902d1fcc..a96af1c5 100644 --- a/clients/tests/gtest_main.cpp +++ b/clients/tests/gtest_main.cpp @@ -22,8 +22,11 @@ /// @brief googletest based unit tester for rocfft /// +#include #include #include +#include +#include #include "accuracy_test.h" #include "fftw_transform.h" @@ -50,6 +53,9 @@ std::vector length; size_t istride0; size_t ostride0; +// Control whether we use FFTW's wisdom (which we use to imply FFTW_MEASURE). +bool use_fftw_wisdom = false; + int main(int argc, char* argv[]) { // NB: If we initialize gtest first, then it removes all of its own command-line @@ -57,13 +63,16 @@ int main(int argc, char* argv[]) // boost::program_options. ::testing::InitGoogleTest(&argc, argv); + // Filename for fftw and fftwf wisdom. + std::string fftw_wisdom_filename; + // Declare the supported options. // clang-format doesn't handle boost program options very well: // clang-format off po::options_description opdesc("rocFFT Runtime Test command line options"); - opdesc.add_options()("help,h", "produces this help message")( - "verbose,v", - po::value()->default_value(0), + opdesc.add_options() + ("help,h", "produces this help message") + ("verbose,v", po::value()->default_value(0), "print out detailed information for the tests.") ("transformType,t", po::value(&transformType) ->default_value(rocfft_transform_type_complex_forward), @@ -85,8 +94,13 @@ int main(int argc, char* argv[]) ( "istride0", po::value(&istride0)->default_value(1), "Input stride ") ( "ostride0", po::value(&ostride0)->default_value(1), - "Output stride "); + "Output stride ") + ("wise,w", "use FFTW wisdom") + ("wisdomfile,W", + po::value(&fftw_wisdom_filename)->default_value("wisdom3.txt"), + "FFTW3 wisdom filename"); // clang-format on + po::variables_map vm; po::store(po::parse_command_line(argc, argv, opdesc), vm); po::notify(vm); @@ -101,6 +115,11 @@ int main(int argc, char* argv[]) verbose = vm["verbose"].as(); + if(vm.count("wise")) + { + use_fftw_wisdom = true; + } + if(length.size() == 0) { length.push_back(8); @@ -111,7 +130,66 @@ int main(int argc, char* argv[]) rocfft_get_version_string(v, 256); std::cout << "rocFFT version: " << v << std::endl; - return RUN_ALL_TESTS(); + if(use_fftw_wisdom) + { + if(verbose) + { + std::cout << "Using " << fftw_wisdom_filename << " wisdom file\n"; + } + std::ifstream fftw_wisdom_file(fftw_wisdom_filename); + std::string allwisdom = std::string(std::istreambuf_iterator(fftw_wisdom_file), + std::istreambuf_iterator()); + + std::string fftw_wisdom; + std::string fftwf_wisdom; + + bool load_wisdom = false; + bool load_fwisdom = false; + std::istringstream input; + input.str(allwisdom); + // Separate the single-precision and double-precision wisdom: + for(std::string line; std::getline(input, line);) + { + if(line.rfind("(fftw", 0) == 0 && line.find("fftw_wisdom") != std::string::npos) + { + load_wisdom = true; + } + if(line.rfind("(fftw", 0) == 0 && line.find("fftwf_wisdom") != std::string::npos) + { + load_fwisdom = true; + } + if(load_wisdom) + { + fftw_wisdom.append(line + "\n"); + } + if(load_fwisdom) + { + fftwf_wisdom.append(line + "\n"); + } + if(line.rfind(")", 0) == 0) + { + load_wisdom = false; + load_fwisdom = false; + } + } + fftw_import_wisdom_from_string(fftw_wisdom.c_str()); + fftwf_import_wisdom_from_string(fftwf_wisdom.c_str()); + } + + auto retval = RUN_ALL_TESTS(); + + if(use_fftw_wisdom) + { + std::string fftw_wisdom = std::string(fftw_export_wisdom_to_string()); + std::string fftwf_wisdom = std::string(fftwf_export_wisdom_to_string()); + fftw_wisdom.append(std::string(fftwf_export_wisdom_to_string())); + std::ofstream fftw_wisdom_file(fftw_wisdom_filename); + fftw_wisdom_file << fftw_wisdom; + fftw_wisdom_file << fftwf_wisdom; + fftw_wisdom_file.close(); + } + + return retval; } TEST(manual, vs_fftw) diff --git a/clients/tests/rocfft_against_fftw.h b/clients/tests/rocfft_against_fftw.h index c4c8b08f..d50f0a7f 100644 --- a/clients/tests/rocfft_against_fftw.h +++ b/clients/tests/rocfft_against_fftw.h @@ -80,6 +80,8 @@ inline bool return true; } +extern bool use_fftw_wisdom; + // Perform and out-of-place computation on contiguous data and then return this in an // an object which will destruct correctly. template @@ -87,64 +89,92 @@ inline std::vector> fftw_transform(const std::vector dims, const std::vector howmany_dims, const rocfft_transform_type transformType, + const size_t isize, const size_t osize, void* cpu_in) { typename fftw_trait::fftw_plan_type cpu_plan = NULL; using fftw_complex_type = typename fftw_trait::fftw_complex_type; + // NB: Using FFTW_MEASURE implies that the input buffer's data may be destroyed during plan + // creation. Therefore, we create a dummy input buffer. + std::vector dummy_input(1); + std::vector> output(1); + switch(transformType) { case rocfft_transform_type_complex_forward: + { + dummy_input.resize(isize * sizeof(fftw_complex_type)); output[0].resize(osize * sizeof(fftw_complex_type)); cpu_plan = fftw_plan_guru64_dft(dims.size(), dims.data(), howmany_dims.size(), howmany_dims.data(), - reinterpret_cast(cpu_in), + reinterpret_cast(dummy_input.data()), reinterpret_cast(output[0].data()), -1, - FFTW_ESTIMATE); - break; + use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE); + fftw_plan_execute_c2c(cpu_plan, + reinterpret_cast(cpu_in), + reinterpret_cast(output[0].data())); + } + break; case rocfft_transform_type_complex_inverse: + { + dummy_input.resize(isize * sizeof(fftw_complex_type)); output[0].resize(osize * sizeof(fftw_complex_type)); cpu_plan = fftw_plan_guru64_dft(dims.size(), dims.data(), howmany_dims.size(), howmany_dims.data(), - reinterpret_cast(cpu_in), + reinterpret_cast(dummy_input.data()), reinterpret_cast(output[0].data()), 1, - FFTW_ESTIMATE); - break; + use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE); + fftw_plan_execute_c2c(cpu_plan, + reinterpret_cast(cpu_in), + reinterpret_cast(output[0].data())); + } + break; case rocfft_transform_type_real_forward: + { + dummy_input.resize(isize * sizeof(Tfloat)); output[0].resize(osize * sizeof(fftw_complex_type)); cpu_plan = fftw_plan_guru64_r2c(dims.size(), dims.data(), howmany_dims.size(), howmany_dims.data(), - reinterpret_cast(cpu_in), + reinterpret_cast(dummy_input.data()), reinterpret_cast(output[0].data()), - FFTW_ESTIMATE); + use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE); + fftw_plan_execute_r2c(cpu_plan, + reinterpret_cast(cpu_in), + reinterpret_cast(output[0].data())); break; + } case rocfft_transform_type_real_inverse: + { + dummy_input.resize(isize * sizeof(fftw_complex_type)); output[0].resize(osize * sizeof(Tfloat)); - cpu_plan = fftw_plan_guru64_c2r(dims.size(), - dims.data(), - howmany_dims.size(), - howmany_dims.data(), - reinterpret_cast(cpu_in), - reinterpret_cast(output[0].data()), - FFTW_ESTIMATE); - break; + cpu_plan + = fftw_plan_guru64_c2r(dims.size(), + dims.data(), + howmany_dims.size(), + howmany_dims.data(), + reinterpret_cast(dummy_input.data()), + reinterpret_cast(output[0].data()), + use_fftw_wisdom ? FFTW_MEASURE : FFTW_ESTIMATE); + fftw_plan_execute_c2r(cpu_plan, + reinterpret_cast(cpu_in), + reinterpret_cast(output[0].data())); + } + break; } - - // Execute the CPU transform: - fftw_execute_type(cpu_plan); fftw_destroy_plan_type(cpu_plan); return output; @@ -184,12 +214,20 @@ inline std::vector> switch(precision) { case rocfft_precision_single: - return fftw_transform( - dims, howmany_dims, transformType, odist * nbatch, (void*)input[0].data()); + return fftw_transform(dims, + howmany_dims, + transformType, + idist * nbatch, + odist * nbatch, + (void*)input[0].data()); break; case rocfft_precision_double: - return fftw_transform( - dims, howmany_dims, transformType, odist * nbatch, (void*)input[0].data()); + return fftw_transform(dims, + howmany_dims, + transformType, + idist * nbatch, + odist * nbatch, + (void*)input[0].data()); break; } }