diff --git a/include/enum_quda.h b/include/enum_quda.h index 3f83ab6a9d..f4c91406b5 100644 --- a/include/enum_quda.h +++ b/include/enum_quda.h @@ -549,6 +549,12 @@ typedef enum QudaWFlowType_s { QUDA_WFLOW_TYPE_INVALID = QUDA_INVALID_ENUM } QudaWFlowType; +typedef enum QudaGaugeFixType_s { + QUDA_GAUGEFIX_TYPE_OVR = 0, + QUDA_GAUGEFIX_TYPE_FFT = 1, + QUDA_GAUGEFIX_TYPE_INVALID = QUDA_INVALID_ENUM +} QudaGaugeFixType; + // Allows to choose an appropriate external library typedef enum QudaExtLibType_s { QUDA_CUSOLVE_EXTLIB, diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h index 867de94d7d..4f9660cafd 100644 --- a/include/enum_quda_fortran.h +++ b/include/enum_quda_fortran.h @@ -492,6 +492,16 @@ #define QUDA_CONTRACT_GAMMA_S34 15 #define QUDA_CONTRACT_GAMMA_INVALID QUDA_INVALID_ENUM +#define QudaWFlowType integer(4) +#define QUDA_WFLOW_TYPE_WILSON 0 +#define QUDA_WFLOW_TYPE_SYMANZIK 1 +#define QUDA_WFLOW_TYPE_INVALID QUDA_INVALID_ENUM + +#define QudaGaugeFixType integer(4) +#define QUDA_GAUGEFIX_TYPE_OVR 0 +#define QUDA_GAUGEFIX_TYPE_FFT 1 +#define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM + #define QudaExtLibType integer(4) #define QUDA_CUSOLVE_EXTLIB 0 #define QUDA_EIGEN_EXTLIB 1 diff --git a/include/gauge_tools.h b/include/gauge_tools.h index e32f38f5b1..e2393cf239 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -113,34 +113,16 @@ namespace quda /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this - * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by Nsteps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] fix_param Parameter struct that defines the gauge fixing */ - void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta); + void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param); /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this - * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] fix_param Parameter struct that defines the gauge fixing */ - void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double alpha, const int autotune, const double tolerance, const int stopWtheta); + void gaugeFixingFFT(GaugeField &data, QudaGaugeFixParam &fix_param); /** @brief Compute the Fmunu tensor diff --git a/include/quda.h b/include/quda.h index 9ba0035988..ced9d2dc25 100644 --- a/include/quda.h +++ b/include/quda.h @@ -778,6 +778,24 @@ extern "C" { QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */ } QudaBLASParam; + typedef struct QudaGaugeFixParam_s { + size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */ + + QudaGaugeFixType fix_type; /**< The aglorithm to use for gauge fixing */ + int gauge_dir; /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */ + int maxiter; /**< The maximum number of gauge fixing iterations to be applied (default 10000) */ + int verbosity_interval; /**< Print the gauge fixing progress every N steps (default 100) */ + double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */ + double fft_alpha; /**< The Alpha parameter in the FFT method (default 0.8) */ + QudaBoolean fft_autotune; /**< Autotune the Alpha parameter in the FFT method (default true) */ + int reunit_interval; /**< Reunitarise the gauge field every N steps (default 10) */ + double tolerance; /**< The tolerance of the gauge fixing quality (default 1e-6) */ + QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the + delta value (default false)" */ + QudaPrecision precision; /**< The precision used by the algorithm */ + + } QudaGaugeFixParam; + /* * Interface functions, found in interface_quda.cpp */ @@ -954,6 +972,15 @@ extern "C" { */ QudaBLASParam newQudaBLASParam(void); + /** + * A new QudaGaugeFixParam should always be initialized immediately + * after it's defined (and prior to explicitly setting its members) + * using this function. Typical usage is as follows: + * + * QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + */ + QudaGaugeFixParam newQudaGaugeFixParam(void); + /** * Print the members of QudaGaugeParam. * @param param The QudaGaugeParam whose elements we are to print. @@ -990,6 +1017,12 @@ extern "C" { */ void printQudaBLASParam(QudaBLASParam *param); + /** + * Print the members of QudaGaugeFixParam. + * @param param The QudaGaugeFixParam whose elements we are to print. + */ + void printQudaGaugeFixParam(QudaGaugeFixParam *param); + /** * Load the gauge field from the host. * @param h_gauge Base pointer to host gauge field (regardless of dimensionality) @@ -1519,41 +1552,14 @@ extern "C" { const int *X); /** - * @brief Gauge fixing with overrelaxation with support for single and multi GPU. - * @param[in,out] gauge, gauge field to be fixed - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when - * iteration reachs the maximum number of steps defined by Nsteps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value - * @param[in] param The parameters of the external fields and the computation settings - * @param[out] timeinfo - */ - int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double relax_boost, const double tolerance, - const unsigned int reunit_interval, const unsigned int stopWtheta, - QudaGaugeParam *param, double *timeinfo); - /** - * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. + * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with + * support for single GPU only. * @param[in,out] gauge, gauge field to be fixed - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when - * iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value - * @param[in] param The parameters of the external fields and the computation settings - * @param[out] timeinfo + * @param[in] gauge_param The parameters of the external fields and the computation settings + * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use. + * @param[out] timeinfo Array to track timings */ - int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double alpha, const unsigned int autotune, - const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, - double *timeinfo); + int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo); /** * @brief Strided Batched GEMM diff --git a/lib/check_params.h b/lib/check_params.h index 1566506917..b49cf04cb0 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -268,7 +268,7 @@ void printQudaCloverParam(QudaInvertParam *param) #if defined CHECK_PARAM if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) - errorQuda("Unexpected QudaInvertParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); + errorQuda("Unexpected QudaCloverParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); #else P(struct_size, (size_t)INVALID_INT); #endif @@ -1047,8 +1047,45 @@ void printQudaBLASParam(QudaBLASParam *param) #endif } -// clean up +#if defined INIT_PARAM +QudaGaugeFixParam newQudaGaugeFixParam(void) +{ + QudaGaugeFixParam ret; +#elif defined CHECK_PARAM +static void checkGaugeFixParam(QudaGaugeFixParam *param) +{ +#else +void printQudaGaugeFixParam(QudaGaugeFixParam *param) +{ + printfQuda("QUDA gauge fix parameters:\n"); +#endif + +#if defined CHECK_PARAM + if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) + errorQuda("Unexpected QudaGaugeFixParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); +#else + P(struct_size, (size_t)INVALID_INT); +#endif + + P(gauge_dir, INVALID_INT); + P(maxiter, INVALID_INT); + P(verbosity_interval, INVALID_INT); + P(reunit_interval, INVALID_INT); + P(ovr_relaxation_boost, INVALID_DOUBLE); + P(fft_alpha, INVALID_DOUBLE); + P(tolerance, INVALID_DOUBLE); + +#ifndef CHECK_PARAM + P(fft_autotune, QUDA_BOOLEAN_FALSE); + P(theta_condition, QUDA_BOOLEAN_FALSE); +#endif +#ifdef INIT_PARAM + return ret; +#endif +} + +// clean up #undef INVALID_INT #undef INVALID_DOUBLE #undef P diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index a44f9368c3..b9aec49975 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -182,19 +182,26 @@ namespace quda { }; template - void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval, - double alpha0, int autotune, double tolerance, int stopWtheta) + void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false); - + + QudaBoolean autotune = fix_param.fft_autotune; + double alpha0 = fix_param.fft_alpha; + double tolerance = fix_param.tolerance; + QudaBoolean theta_condition = fix_param.theta_condition; + int steps = fix_param.maxiter; + int verbose_interval = fix_param.verbosity_interval; + profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); if (getVerbosity() >= QUDA_SUMMARIZE) { - printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false"); + if(autotune == QUDA_BOOLEAN_TRUE) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n"); + else printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n"); printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0); - printfQuda("\tTolerance: %lf\n", tolerance); - printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); - printfQuda("\tMaximum number of iterations: %d\n", Nsteps); + printfQuda("\tTolerance: %e\n", tolerance); + printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta"); + printfQuda("\tMaximum number of iterations: %d\n", steps); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); } @@ -217,11 +224,11 @@ namespace quda { GaugeFixQuality gfixquality(argQ, data); gfixquality.apply(device::get_default_stream()); double action0 = argQ.getAction(); - if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); + if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); double diff = 0.0; int iter = 0; - for (iter = 0; iter < Nsteps; iter++) { + for (iter = 0; iter < steps; iter++) { for (int k = 0; k < 6; k++) { //------------------------------------------------------------------------ // Set a pointer do the element k in lattice volume @@ -285,23 +292,23 @@ namespace quda { double action = argQ.getAction(); diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE) - printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); - if ( autotune && ((action - action0) < -1e-14) ) { + printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + if ( autotune == QUDA_BOOLEAN_TRUE && ((action - action0) < -1e-14) ) { if ( arg.alpha > 0.01 ) { arg.alpha = 0.95 * arg.alpha; - if(getVerbosity() >= QUDA_SUMMARIZE) printf(">>>>>>>>>>>>>> Warning: changing alpha down -> %.4e\n", arg.alpha); + if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha); } } //------------------------------------------------------------------------ // Check gauge fix quality criterion //------------------------------------------------------------------------ - if ( stopWtheta ) { if ( argQ.getTheta() < tolerance ) break; } + if ( theta_condition == QUDA_BOOLEAN_TRUE ) { if ( argQ.getTheta() < tolerance ) break; } else { if ( diff < tolerance ) break; } action0 = action; } if ((iter % verbose_interval) != 0 && getVerbosity() >= QUDA_SUMMARIZE) - printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff); + printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff); // Reunitarize at end const double unitarize_eps = 1e-14; @@ -356,21 +363,23 @@ namespace quda { gflops = (gflops * 1e-9) / (secs); gbytes = gbytes / (secs * 1e9); - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes); - + if (getVerbosity() > QUDA_SUMMARIZE) + printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes); + host_free(num_failures_h); } template struct GaugeFixingFFT { - GaugeFixingFFT(GaugeField& data, int gauge_dir, int Nsteps, int verbose_interval, - double alpha, int autotune, double tolerance, int stopWtheta) + GaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { - if (gauge_dir != 3) { - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + if (fix_param.gauge_dir == 4) { + if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs\n"); + gaugeFixingFFT(data, fix_param); + } else if (fix_param.gauge_dir == 3) { + if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs\n"); + gaugeFixingFFT(data, fix_param); } else { - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); } } }; @@ -378,23 +387,16 @@ namespace quda { /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] fix_param Parameter struct defining the gauge fixing */ #if defined(GPU_GAUGE_ALG) - void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, - const int autotune, const double tolerance, const int stopWtheta) + void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!"); - instantiate(data, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + instantiate(data, fix_param); } #else - void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const int, const double, const int) + void gaugeFixingFFT(GaugeField&, QudaGaugeFixParam &) { errorQuda("Gauge fixing has bot been built"); } diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index b97772f43d..38b7948f99 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -223,12 +223,17 @@ namespace quda { }; template - void gaugeFixingOVR(GaugeField &data,const int Nsteps, const int verbose_interval, - const double relax_boost, const double tolerance, - const int reunit_interval, const int stopWtheta) + void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param) { TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false); + double relax_boost = fix_param.ovr_relaxation_boost; + double tolerance = fix_param.tolerance; + QudaBoolean theta_condition = fix_param.theta_condition; + int steps = fix_param.maxiter; + int reunit_interval = fix_param.reunit_interval; + int verbose_interval = fix_param.verbosity_interval; + profileInternalGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); double flop = 0; double byte = 0; @@ -236,10 +241,11 @@ namespace quda { if (getVerbosity() >= QUDA_SUMMARIZE) { printfQuda("\tOverrelaxation boost parameter: %e\n", relax_boost); printfQuda("\tTolerance: %le\n", tolerance); - printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); - printfQuda("\tMaximum number of iterations: %d\n", Nsteps); + printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta"); + printfQuda("\tMaximum number of iterations: %d\n", steps); printfQuda("\tReunitarize at every %d steps\n", reunit_interval); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); + printfQuda("\tComputing in %s precision\n", sizeof(Float) == sizeof(double) ? "double" : "single"); } const double unitarize_eps = 1e-14; @@ -313,7 +319,7 @@ namespace quda { flop += (double)GaugeFixQuality.flops(); byte += (double)GaugeFixQuality.bytes(); double action0 = argQ.getAction(); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); *num_failures_h = 0; unitarizeLinks(data, data, num_failures_d); @@ -324,7 +330,7 @@ namespace quda { GaugeFix gfixBorderPoints(data, relax_boost, borderpoints, true, threads); int iter = 0; - for (iter = 0; iter < Nsteps; iter++) { + for (iter = 0; iter < steps; iter++) { for (int p = 0; p < 2; p++) { if (comm_partitioned()) { gfixBorderPoints.setParity(p); //compute border points @@ -413,8 +419,8 @@ namespace quda { double action = argQ.getAction(); double diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE) - printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); - if (stopWtheta) { + printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + if (theta_condition == QUDA_BOOLEAN_TRUE) { if (argQ.getTheta() < tolerance) break; } else { if ( diff < tolerance ) break; @@ -436,7 +442,7 @@ namespace quda { byte += (double)GaugeFixQuality.bytes(); double action = argQ.getAction(); double diff = abs(action0 - action); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); } for (int i = 0; i < 2 && nlinksfaces; i++) managed_free(borderpoints[i]); @@ -470,17 +476,16 @@ namespace quda { } template struct GaugeFixingOVR { - GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta) + GaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param) { - if (gauge_dir == 4) { - if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n"); - gaugeFixingOVR(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - } else if (gauge_dir == 3) { - if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n"); - gaugeFixingOVR(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); + if (fix_param.gauge_dir == 4) { + if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with Overrelaxation\n"); + gaugeFixingOVR(data, fix_param); + } else if (fix_param.gauge_dir == 3) { + if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with Overrelaxation\n"); + gaugeFixingOVR(data, fix_param); } else { - errorQuda("Unexpected gauge_dir = %d", gauge_dir); + errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); } } }; @@ -488,22 +493,15 @@ namespace quda { /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] fix_param Parameter struct defining the gauge fixing */ #ifdef GPU_GAUGE_ALG - void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, - const double tolerance, const int reunit_interval, const int stopWtheta) + void gaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param) { - instantiate(data, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); + instantiate(data, fix_param); } #else - void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const int) + void gaugeFixingOVR(GaugeField&, QudaGaugeFixParam &) { errorQuda("Gauge fixing has not been built"); } diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index ddb13df03e..59c63d9b1a 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -237,8 +237,7 @@ static TimeProfile profileMomAction("momActionQuda"); static TimeProfile profileEnd("endQuda"); //!< Profiler for GaugeFixing -static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda"); -static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda"); +static TimeProfile profileGaugeFix("gaugeFixQuda"); //!< Profiler for toal time spend between init and end static TimeProfile profileInit2End("initQuda-endQuda",false); @@ -1483,6 +1482,7 @@ void endQuda(void) profileProject.Print(); profilePhase.Print(); profileMomAction.Print(); + profileGaugeFix.Print(); profileEnd.Print(); profileInit2End.Print(); @@ -5519,141 +5519,88 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval popOutputPrefix(); } -int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double relax_boost, const double tolerance, - const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, - double *timeinfo) +int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo) { - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL); + profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL); - checkGaugeParam(param); - - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gParam(*param, gauge); - auto *cpuGauge = new cpuGaugeField(gParam); - - // gParam.pad = getFatLinkPadding(param->X); - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param->type; - gParam.reconstruct = param->reconstruct; - gParam.setPrecision(gParam.Precision(), true); - auto *cudaInGauge = new cudaGaugeField(gParam); - - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D); - - ///if (!param->use_resident_gauge) { // load fields onto the device - cudaInGauge->loadCPUField(*cpuGauge); - /* } else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D); - - if (comm_size() == 1) { - // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, - stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); - } else { - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda); - - // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, - stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); - - //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge - copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); + if (!initialized) errorQuda("QUDA not initialized"); + if (getVerbosity() == QUDA_DEBUG_VERBOSE) { + printQudaGaugeParam(g_param); + printQudaGaugeFixParam(fix_param); } - // copy the gauge field back to the host - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H); - cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H); - - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL); + // Check parameters + checkGaugeParam(g_param); + checkGaugeFixParam(fix_param); - if (param->make_resident_gauge) { - if (gaugePrecise != nullptr) delete gaugePrecise; - gaugePrecise = cudaInGauge; - } else { - delete cudaInGauge; + if (g_param->location == QUDA_CUDA_FIELD_LOCATION) { + errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: " + "gaugeFixingOVR/gaugeFixingFFT"); } - if(timeinfo){ - timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H); - } - - return 0; -} - -int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const unsigned int Nsteps, \ - const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \ - const unsigned int stopWtheta, QudaGaugeParam* param , double* timeinfo) -{ - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL); - - checkGaugeParam(param); - - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT); - - GaugeFieldParam gParam(*param, gauge); - auto *cpuGauge = new cpuGaugeField(gParam); + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gauge_param(*g_param, gauge); + auto *cpu_gauge = new cpuGaugeField(gauge_param); - //gParam.pad = getFatLinkPadding(param->X); - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param->type; - gParam.reconstruct = param->reconstruct; - gParam.setPrecision(gParam.Precision(), true); - auto *cudaInGauge = new cudaGaugeField(gParam); - - - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT); - - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D); - - //if (!param->use_resident_gauge) { // load fields onto the device - cudaInGauge->loadCPUField(*cpuGauge); - /*} else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D); - - // perform the update - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE); + // Make GPU field + gauge_param.create = QUDA_NULL_FIELD_CREATE; + gauge_param.link_type = g_param->type; + gauge_param.reconstruct = g_param->reconstruct; + gauge_param.setPrecision(fix_param->precision, true); + auto *device_gauge = new cudaGaugeField(gauge_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); + + // Load gauge to device + profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); + device_gauge->loadCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); + + // Perform the update + switch (fix_param->fix_type) { + + case QUDA_GAUGEFIX_TYPE_OVR: + if (comm_size() == 1) { + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingOVR(*device_gauge, *fix_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + } else { + // For MPI, we must perform a halo exchange + cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix); + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingOVR(*device_gauge_extended, *fix_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION); + } + break; - gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + case QUDA_GAUGEFIX_TYPE_FFT: + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingFFT(*device_gauge, *fix_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + break; - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type); + } - // copy the gauge field back to the host - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H); - cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H); + // Copy the fixed gauge field back to the host. + profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); + device_gauge->saveCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); - if (param->make_resident_gauge) { + if (g_param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; - gaugePrecise = cudaInGauge; + gaugePrecise = device_gauge; } else { - delete cudaInGauge; + delete device_gauge; } + delete cpu_gauge; if (timeinfo) { - timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H); } return 0; diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp index 64ad1e4efc..04862243b6 100644 --- a/lib/milc_interface.cpp +++ b/lib/milc_interface.cpp @@ -2692,15 +2692,22 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost, double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink) { - QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim, - (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, - QUDA_SU3_LINKS); - qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; + QudaGaugeParam gauge_param + = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_SU3_LINKS); + gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; + QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + fix_param.gauge_dir = gauge_dir; + fix_param.maxiter = Nsteps; + fix_param.verbosity_interval = verbose_interval; + fix_param.ovr_relaxation_boost = relax_boost; + fix_param.tolerance = tolerance; + fix_param.reunit_interval = reunit_interval; + fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + double timeinfo[3]; - computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \ - &qudaGaugeParam, timeinfo); + computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); printfQuda("Time H2D: %lf\n", timeinfo[0]); printfQuda("Time to Compute: %lf\n", timeinfo[1]); @@ -2719,16 +2726,22 @@ void qudaGaugeFixingFFT( int precision, void* milc_sitelink ) { - QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim, - (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, - QUDA_GENERAL_LINKS); - qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; + QudaGaugeParam gauge_param + = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_GENERAL_LINKS); + gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; + QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + fix_param.gauge_dir = gauge_dir; + fix_param.maxiter = Nsteps; + fix_param.verbosity_interval = verbose_interval; + fix_param.fft_alpha = alpha; + fix_param.tolerance = tolerance; + fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + fix_param.fft_autotune = autotune == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; double timeinfo[3]; - computeGaugeFixingFFTQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta, \ - &qudaGaugeParam, timeinfo); + computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); printfQuda("Time H2D: %lf\n", timeinfo[0]); printfQuda("Time to Compute: %lf\n", timeinfo[1]); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 0ab7163b3d..e4ef084b2d 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -33,20 +33,22 @@ using namespace quda; //***********************************************************// bool execute = true; +// Gauge IO related bool gauge_load; bool gauge_store; - void *host_gauge[4]; +// Define the command line options and option group for this test int gf_gauge_dir = 4; int gf_maxiter = 10000; int gf_verbosity_interval = 100; double gf_ovr_relaxation_boost = 1.5; double gf_fft_alpha = 0.8; +bool gf_fft_autotune = true; int gf_reunit_interval = 10; double gf_tolerance = 1e-6; bool gf_theta_condition = false; -bool gf_fft_autotune = false; +QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR; void display_test_info() { @@ -54,8 +56,9 @@ void display_test_info() switch (test_type) { case 0: printfQuda("\n Google testing\n"); break; - case 1: printfQuda("\nOVR gauge fix\n"); break; - case 2: printfQuda("\nFFT gauge fix\n"); break; + case 1: + printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + break; default: errorQuda("Undefined test type %d given", test_type); } @@ -69,22 +72,66 @@ void display_test_info() dimPartitioned(3)); } +void add_gaugefix_option_group(std::shared_ptr quda_app) +{ + CLI::TransformPairs fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}}; + + // Option group for gauge fixing related options + auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); + opgroup->add_option("--gf-dir", gf_gauge_dir, + "The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); + opgroup->add_option("--gf-maxiter", gf_maxiter, + "The maximun number of gauge fixing iterations to be applied (default 10000)"); + opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval, + "Print the gauge fixing progress every N steps (default 100)"); + opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, + "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); + opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)"); + opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, + "Autotune the Alpha parameter in the FFT method (default true)"); + opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, + "Reunitarise the gauge field every N steps (default 10)"); + opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)"); + opgroup->add_option( + "--gf-theta-condition", gf_theta_condition, + "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option("--gf-fix-type", gf_fix_type, "The type of algorithm to use for fixing (default ovr)") + ->transform(CLI::QUDACheckedTransformer(fix_type_map)); +} + +void setGaugeFixParam(QudaGaugeFixParam &fix_param) +{ + fix_param.fix_type = gf_fix_type; + fix_param.gauge_dir = gf_gauge_dir; + fix_param.maxiter = gf_maxiter; + fix_param.verbosity_interval = gf_verbosity_interval; + fix_param.reunit_interval = gf_reunit_interval; + fix_param.tolerance = gf_tolerance; + fix_param.ovr_relaxation_boost = gf_ovr_relaxation_boost; + fix_param.fft_alpha = gf_fft_alpha; + fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + fix_param.precision = cuda_prec; +} + class GaugeAlgTest : public ::testing::Test { -protected: - QudaGaugeParam param; - Timer a0, a1; - double2 detu; - double3 plaq; - GaugeField *U; +protected: + QudaGaugeParam gauge_param; + QudaGaugeFixParam fix_param; + + host_timer_t host_timer_1, host_timer_2; + double2 det_u; + double2 trace_u; + double3 plaq_u; + cudaGaugeField *U; int nsteps; int nhbsteps; int novrsteps; bool coldstart; double beta_value; - - bool unit_test; + RNG *randstates; void SetReunitarizationConsts() { @@ -97,6 +144,13 @@ class GaugeAlgTest : public ::testing::Test setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } + bool checkDimsPartitioned() + { + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; + } + bool comparePlaquette(double3 a, double3 b) { double a0, a1, a2; @@ -104,56 +158,55 @@ class GaugeAlgTest : public ::testing::Test a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-10; return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } - bool CheckDeterminant(double2 detu) + bool checkDeterminant(double2 det) { - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; - return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-8; + return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val); } virtual void SetUp() { if (execute) { - setVerbosity(QUDA_VERBOSE); - param = newQudaGaugeParam(); // Setup gauge container. - setWilsonGaugeParam(param); - param.t_boundary = QUDA_PERIODIC_T; + gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; // Reunitarization setup int *num_failures_h = (int *)mapped_malloc(sizeof(int)); int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); SetReunitarizationConsts(); - a0.start(); + host_timer_1.start(); // If no field is loaded, create a physical quenched field on the device if (!gauge_load) { - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.reconstruct = link_recon; - gParam.setPrecision(prec, true); + GaugeFieldParam device_gauge_param(gauge_param); + device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; + device_gauge_param.create = QUDA_NULL_FIELD_CREATE; + device_gauge_param.reconstruct = link_recon; + device_gauge_param.setPrecision(cuda_prec, true); for (int d = 0; d < 4; d++) { - if (comm_dim_partitioned(d)) gParam.r[d] = 2; - gParam.x[d] += 2 * gParam.r[d]; + if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2; + device_gauge_param.x[d] += 2 * device_gauge_param.r[d]; } - U = new cudaGaugeField(gParam); + U = new cudaGaugeField(device_gauge_param); - RNG randstates(*U, 1234); + RNG randstates(*U, quda_seed); nsteps = heatbath_num_steps; nhbsteps = heatbath_num_heatbath_per_step; novrsteps = heatbath_num_overrelax_per_step; coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; - a1.start(); + host_timer_2.start(); if (coldstart) InitGaugeField(*U); @@ -169,37 +222,37 @@ class GaugeAlgTest : public ::testing::Test unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + plaq_u = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); } - a1.stop(); - printfQuda("Time Monte -> %.6f s\n", a1.last()); + host_timer_2.stop(); + printfQuda("Time Monte -> %.6f s\n", host_timer_2.last()); } else { // If a field is loaded, create a device field and copy printfQuda("Copying gauge field from host\n"); - param.location = QUDA_CPU_FIELD_LOCATION; - GaugeFieldParam gauge_field_param(param, host_gauge); - gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - GaugeField *host = GaugeField::Create(gauge_field_param); + gauge_param.location = QUDA_CPU_FIELD_LOCATION; + GaugeFieldParam host_gauge_param(gauge_param, host_gauge); + host_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + GaugeField *host = GaugeField::Create(host_gauge_param); // switch the parameters for creating the mirror precise cuda gauge field - gauge_field_param.create = QUDA_NULL_FIELD_CREATE; - gauge_field_param.reconstruct = param.reconstruct; - gauge_field_param.setPrecision(param.cuda_prec, true); + host_gauge_param.create = QUDA_NULL_FIELD_CREATE; + host_gauge_param.reconstruct = gauge_param.reconstruct; + host_gauge_param.setPrecision(gauge_param.cuda_prec, true); if (comm_partitioned()) { int R[4] = {0, 0, 0, 0}; for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2; static TimeProfile GaugeFix("GaugeFix"); - cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param); + cudaGaugeField *tmp = new cudaGaugeField(host_gauge_param); tmp->copy(*host); U = createExtendedGauge(*tmp, R, GaugeFix); delete tmp; } else { - U = new cudaGaugeField(gauge_field_param); + U = new cudaGaugeField(host_gauge_param); U->copy(*host); } @@ -210,20 +263,29 @@ class GaugeAlgTest : public ::testing::Test unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); } - // If a specific test type is requested, perfrom it now and then + // Unfixed Gauge data + plaq_u = plaquette(*U); + det_u = getLinkDeterminant(*U); + trace_u = getLinkTrace(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); + printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y); + printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + + // If a specific test type is requested, perform it now and then // turn off all Google tests in the tear down. switch (test_type) { - case 0: - // Do the Google testing + case 0: // Do the Google testing + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + break; + case 1: // Do a specific test + run(); break; - case 1: run_ovr(); break; - case 2: run_fft(); break; - default: errorQuda("Invalid test type %d", test_type); + default: errorQuda("Invalid test type %d ", test_type); } host_free(num_failures_h); @@ -233,55 +295,92 @@ class GaugeAlgTest : public ::testing::Test virtual void TearDown() { if (execute) { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); + + // Compare gauge fixed data with original data + auto plaq_gf = plaquette(*U); + auto det_gf = getLinkDeterminant(*U); + auto trace_gf = getLinkTrace(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); + printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + printfQuda("Det: %.16e, %.16e\n", det_u.x, det_u.y); + printfQuda("Det GF: %.16e, %.16e\n", det_gf.x, det_gf.y); + printfQuda("Trace: %.16e, %.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + printfQuda("Trace GF: %.16e, %.16e\n", trace_gf.x / 3.0, trace_gf.y / 3.0); + + // As an observable, the plaquette value must remain invariant after + // gauge fixing. + ASSERT_TRUE(comparePlaquette(plaq_u, plaq_gf)); + + // The determinant of any SU(N) gauge field element must be (1.0,0.0) to + // machine precision + ASSERT_TRUE(checkDeterminant(det_gf)); delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - a0.stop(); - printfQuda("Time -> %.6f s\n", a0.last()); + host_timer_1.stop(); + printfQuda("Time -> %.6f s\n", host_timer_1.last()); } // If we performed a specific instance, switch off the // Google testing. if (test_type != 0) execute = false; + saveTuneCache(); } - virtual void run_ovr() + virtual void run() { if (execute) { - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, - gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); + // Set gauge fixing params from the command line + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + + printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", + get_gaugefix_str(fix_param.fix_type)); + + // Setup CPU gauge container. + gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + gauge_param.location = QUDA_CPU_FIELD_LOCATION; + + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * cpu_prec); } + + GaugeFieldParam param(gauge_param); + param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + param.create = QUDA_NULL_FIELD_CREATE; + param.link_type = gauge_param.type; + param.reconstruct = gauge_param.reconstruct; + param.setPrecision(cuda_prec, true); + + auto *gauge = new cudaGaugeField(param); + + // Copy the target U field (extended) into regular GPU field, then + // save to a CPU field. This is done to test the CPU interface function + // and instructs the user how to use void pointers for the gauge data, + // and the gauge_param container. + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); + delete gauge; + + // Compute gauge fixing via interface + computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr); + + // cpu_gauge now contains the fixed gauge on the CPU. We now load that gauge + // to the device for inspection in the TearDown. + GaugeFieldParam fixed_param(gauge_param, cpu_gauge); + auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param); + + // Copy the CPU field to U. + U->loadCPUField(*fixed_cpu_gauge); + + for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]); + delete fixed_cpu_gauge; + // Save if output string is specified if (gauge_store) save_gauge(); - } - } - virtual void run_fft() - { - if (execute) { - if (!comm_partitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - // Save if output string is specified - if (gauge_store) save_gauge(); - } else { - errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); - } + saveTuneCache(); } } @@ -289,20 +388,18 @@ class GaugeAlgTest : public ::testing::Test { printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - QudaGaugeParam gauge_param = newQudaGaugeParam(); - setWilsonGaugeParam(gauge_param); - void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); + GaugeFieldParam param(gauge_param); + param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + param.create = QUDA_NULL_FIELD_CREATE; + param.link_type = gauge_param.type; + param.reconstruct = gauge_param.reconstruct; + param.setPrecision(param.Precision(), true); - cudaGaugeField *gauge = new cudaGaugeField(gParam); + cudaGaugeField *gauge; + gauge = new cudaGaugeField(param); // copy into regular field copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); @@ -319,21 +416,22 @@ class GaugeAlgTest : public ::testing::Test TEST_F(GaugeAlgTest, Generation) { if (execute && !gauge_load) { - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); + // Assert that the generated gauge is + // on the SU(N) manifold + det_u = getLinkDeterminant(*U); + ASSERT_TRUE(checkDeterminant(det_u)); } } TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + printfQuda("Landau gauge fixing with overrelaxation method\n"); + + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; + fix_param.gauge_dir = 4; + + gaugeFixingOVR(*U, fix_param); saveTuneCache(); } } @@ -341,13 +439,12 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + printfQuda("Coulomb gauge fixing with overrelaxation method\n"); + + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; + fix_param.gauge_dir = 3; + + gaugeFixingOVR(*U, fix_param); saveTuneCache(); } } @@ -356,13 +453,12 @@ TEST_F(GaugeAlgTest, Landau_FFT) { if (execute) { if (!comm_partitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); + + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; + fix_param.gauge_dir = 4; + + gaugeFixingFFT(*U, fix_param); saveTuneCache(); } } @@ -372,42 +468,17 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) { if (execute) { if (!comm_partitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); + + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; + fix_param.gauge_dir = 3; + + gaugeFixingFFT(*U, fix_param); saveTuneCache(); } } } -void add_gaugefix_option_group(std::shared_ptr quda_app) -{ - // Option group for gauge fixing related options - auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); - opgroup->add_option("--gf-dir", gf_gauge_dir, - "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); - opgroup->add_option("--gf-maxiter", gf_maxiter, - "The maximun number of gauge fixing iterations to be applied (default 10000) "); - opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval, - "Print the gauge fixing progress every N steps (default 100)"); - opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, - "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); - opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)"); - opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, - "Reunitarise the gauge field every N steps (default 10)"); - opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)"); - opgroup->add_option( - "--gf-theta-condition", gf_theta_condition, - "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); - opgroup->add_option( - "--gf-fft-autotune", gf_fft_autotune, - "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)"); -} - int main(int argc, char **argv) { // initalize google test, includes command line options @@ -429,21 +500,25 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - + setVerbosity(verbosity); + setQudaPrecisions(); setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; setDims(gauge_param.X); + // call srand() with a rank-dependent seed + initRand(); + // initialize the QUDA library + initQuda(device_ordinal); + display_test_info(); + // If we are passing a gauge field to the test, we must allocate host memory. + // If no gauge is passed, we generate a quenched field on the device. gauge_load = strcmp(latfile, ""); gauge_store = strcmp(gauge_outfile, ""); - // If we are passing a gauge field to the test, we must allocate host memory. - // If no gauge is passed, we generate a quenched field on the device. if (gauge_load) { printfQuda("Loading gauge field from host\n"); for (int dir = 0; dir < 4; dir++) { @@ -452,11 +527,8 @@ int main(int argc, char **argv) constructHostGaugeField(host_gauge, gauge_param, argc, argv); } - // call srand() with a rank-dependent seed - initRand(); - - // initialize the QUDA library - initQuda(device_ordinal); + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); // Ensure gtest prints only from rank 0 ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); @@ -464,7 +536,6 @@ int main(int argc, char **argv) // return code for google test int test_rc = RUN_ALL_TESTS(); - if (gauge_load) { // release memory for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]); diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index f87005c063..300f0b1012 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -56,6 +56,7 @@ int main(int argc, char **argv) { // command line options auto app = make_app(); + add_heatbath_option_group(app); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { @@ -124,7 +125,7 @@ int main(int argc, char **argv) for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; cudaGaugeField *gaugeEx = new cudaGaugeField(gParamEx); // CURAND random generator initialization - RNG *randstates = new RNG(*gauge, 1234); + RNG *randstates = new RNG(*gauge, quda_seed); int nsteps = heatbath_num_steps; int nwarm = heatbath_warmup_steps; diff --git a/tests/invert_test.cpp b/tests/invert_test.cpp index e559f04520..1ba40618c9 100644 --- a/tests/invert_test.cpp +++ b/tests/invert_test.cpp @@ -304,7 +304,7 @@ int main(int argc, char **argv) std::vector gflops(Nsrc); std::vector iter(Nsrc); - auto *rng = new quda::RNG(*check, 1234); + auto *rng = new quda::RNG(*check, quda_seed); for (int i = 0; i < Nsrc; i++) { // Populate the host spinor with random numbers. diff --git a/tests/multigrid_evolve_test.cpp b/tests/multigrid_evolve_test.cpp index de6e3f22d9..fb63443ae8 100644 --- a/tests/multigrid_evolve_test.cpp +++ b/tests/multigrid_evolve_test.cpp @@ -244,7 +244,7 @@ int main(int argc, char **argv) obs_param.compute_qcharge = QUDA_BOOLEAN_TRUE; // CURAND random generator initialization - RNG *randstates = new RNG(*gauge, 1234); + RNG *randstates = new RNG(*gauge, quda_seed); int nsteps = 10; int nhbsteps = 1; int novrsteps = 1; diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp index b2f4e96588..a0a39c7adb 100644 --- a/tests/staggered_invert_test.cpp +++ b/tests/staggered_invert_test.cpp @@ -299,7 +299,7 @@ int main(int argc, char **argv) //----------------------------------------------------------------------------------- // Prepare rng - auto *rng = new quda::RNG(*ref, 1234); + auto *rng = new quda::RNG(*ref, quda_seed); // Performance measuring std::vector time(Nsrc); diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index c62ca10a09..44b12bb528 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -94,7 +94,6 @@ void add_su3_option_group(std::shared_ptr quda_app) opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)") ->transform(CLI::QUDACheckedTransformer(wflow_type_map)); - ; opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and topological charge every Nth step (default 5) "); @@ -117,28 +116,30 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - QudaGaugeParam gauge_param = newQudaGaugeParam(); if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + initQuda(device_ordinal); + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); gauge_param.t_boundary = QUDA_PERIODIC_T; setDims(gauge_param.X); - void *gauge[4], *new_gauge[4]; + // All user inputs now defined + display_test_info(); + // *** QUDA parameters begin here. + void *gauge[4], *new_gauge[4]; for (int dir = 0; dir < 4; dir++) { gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); } - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); @@ -151,9 +152,6 @@ int main(int argc, char **argv) #ifdef GPU_GAUGE_TOOLS - // All user inputs now defined - display_test_info(); - // Topological charge and gauge energy double q_charge_check = 0.0; // Size of floating point data diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 5dd4ab8665..4a500bdab7 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -9,6 +9,8 @@ int device_ordinal = -1; int device_ordinal = 0; #endif +int quda_seed = 1234; + int rank_order; std::array gridsize_from_cmdline = {1, 1, 1, 1}; auto &grid_x = gridsize_from_cmdline[0]; @@ -501,6 +503,8 @@ std::shared_ptr make_app(std::string app_description, std::string app_n quda_app->add_option("--save-gauge", gauge_outfile, "Save gauge field \" file \" for the test (requires QIO, heatbath test only)"); + quda_app->add_option("--seed", quda_seed, "Seed value for use in test suite (default 1234)")->check(CLI::PositiveNumber); + quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline, "The pipeline length for fused solution accumulation (default 0, no pipelining)"); diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h index 10c8d775f2..77ac51f4cf 100644 --- a/tests/utils/command_line_params.h +++ b/tests/utils/command_line_params.h @@ -151,6 +151,7 @@ template std::string inline get_string(CLI::TransformPairs &map, // } // parameters +extern int quda_seed; extern int device_ordinal; extern int rank_order; extern bool native_blas_lapack; diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index f34bf7bf92..982ebd66cd 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -69,6 +69,7 @@ void setQudaPrecisions() if (prec_eigensolver == QUDA_INVALID_PRECISION) prec_eigensolver = prec_sloppy; if (prec_precondition == QUDA_INVALID_PRECISION) prec_precondition = prec_sloppy; if (prec_null == QUDA_INVALID_PRECISION) prec_null = prec_precondition; + if (prec_refinement_sloppy == QUDA_INVALID_PRECISION) prec_refinement_sloppy = prec_precondition; if (smoother_halo_prec == QUDA_INVALID_PRECISION) smoother_halo_prec = prec_null; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; if (link_recon_precondition == QUDA_RECONSTRUCT_INVALID) link_recon_precondition = link_recon_sloppy; @@ -319,7 +320,7 @@ void initRand() MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif - srand(17 * rank + 137); + srand(17 * rank + 137 + quda_seed); } void setDims(int *X) @@ -979,8 +980,11 @@ int x4_from_full_index(int i) template void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param) { // Apply spatial scaling factor (u0) to spatial links - for (int d = 0; d < 3; d++) { - for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] /= param->anisotropy; } + if (param->anisotropy != 1.0) { + double aniso_inv = 1.0 / param->anisotropy; + for (int d = 0; d < 3; d++) { + for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; } + } } // Apply boundary conditions to temporal links diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp index 3747b08605..714b75f862 100644 --- a/tests/utils/misc.cpp +++ b/tests/utils/misc.cpp @@ -151,6 +151,19 @@ const char *get_contract_str(QudaContractType type) return ret; } +const char *get_gaugefix_str(QudaGaugeFixType type) +{ + const char *ret; + + switch (type) { + case QUDA_GAUGEFIX_TYPE_OVR: ret = "Overrelaxation"; break; + case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break; + default: ret = "unknown"; break; + } + + return ret; +} + const char *get_eig_spectrum_str(QudaEigSpectrumType type) { const char *ret; diff --git a/tests/utils/misc.h b/tests/utils/misc.h index 5c35480840..bc4c8a2e4b 100644 --- a/tests/utils/misc.h +++ b/tests/utils/misc.h @@ -21,6 +21,7 @@ const char *get_eig_type_str(QudaEigType type); const char *get_ritz_location_str(QudaFieldLocation type); const char *get_memory_type_str(QudaMemoryType type); const char *get_contract_str(QudaContractType type); +const char *get_gaugefix_str(QudaGaugeFixType type); #define XUP 0 #define YUP 1