diff --git a/include/enum_quda.h b/include/enum_quda.h
index 3f83ab6a9d..f4c91406b5 100644
--- a/include/enum_quda.h
+++ b/include/enum_quda.h
@@ -549,6 +549,12 @@ typedef enum QudaWFlowType_s {
   QUDA_WFLOW_TYPE_INVALID = QUDA_INVALID_ENUM
 } QudaWFlowType;
 
+typedef enum QudaGaugeFixType_s {
+  QUDA_GAUGEFIX_TYPE_OVR = 0,
+  QUDA_GAUGEFIX_TYPE_FFT = 1,
+  QUDA_GAUGEFIX_TYPE_INVALID = QUDA_INVALID_ENUM
+} QudaGaugeFixType;
+
 // Allows to choose an appropriate external library
 typedef enum QudaExtLibType_s {
   QUDA_CUSOLVE_EXTLIB,
diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h
index 867de94d7d..4f9660cafd 100644
--- a/include/enum_quda_fortran.h
+++ b/include/enum_quda_fortran.h
@@ -492,6 +492,16 @@
 #define QUDA_CONTRACT_GAMMA_S34 15
 #define QUDA_CONTRACT_GAMMA_INVALID QUDA_INVALID_ENUM
 
+#define QudaWFlowType integer(4)
+#define QUDA_WFLOW_TYPE_WILSON 0
+#define QUDA_WFLOW_TYPE_SYMANZIK 1
+#define QUDA_WFLOW_TYPE_INVALID QUDA_INVALID_ENUM
+
+#define QudaGaugeFixType integer(4)
+#define QUDA_GAUGEFIX_TYPE_OVR 0
+#define QUDA_GAUGEFIX_TYPE_FFT 1
+#define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM
+
 #define QudaExtLibType integer(4)
 #define QUDA_CUSOLVE_EXTLIB 0
 #define QUDA_EIGEN_EXTLIB 1
diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index e32f38f5b1..e2393cf239 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -113,34 +113,16 @@ namespace quda
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this
-   * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by Nsteps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] fix_param Parameter struct that defines the gauge fixing
    */
-  void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                      const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta);
+  void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param);
 
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this
-   * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] fix_param Parameter struct that defines the gauge fixing
    */
-  void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                      const double alpha, const int autotune, const double tolerance, const int stopWtheta);
+  void gaugeFixingFFT(GaugeField &data, QudaGaugeFixParam &fix_param);
 
   /**
      @brief Compute the Fmunu tensor
diff --git a/include/quda.h b/include/quda.h
index 9ba0035988..ced9d2dc25 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -778,6 +778,24 @@ extern "C" {
     QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */
   } QudaBLASParam;
 
+  typedef struct QudaGaugeFixParam_s {
+    size_t struct_size; /**< Size of this struct in bytes.  Used to ensure that the host application and QUDA see the same struct size */
+
+    QudaGaugeFixType fix_type;   /**< The aglorithm to use for gauge fixing */
+    int gauge_dir;               /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */
+    int maxiter;                 /**< The maximum number of gauge fixing iterations to be applied (default 10000) */
+    int verbosity_interval;      /**< Print the gauge fixing progress every N steps (default 100) */
+    double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */
+    double fft_alpha;            /**< The Alpha parameter in the FFT method (default 0.8) */
+    QudaBoolean fft_autotune;    /**< Autotune the Alpha parameter in the FFT method (default true) */
+    int reunit_interval;         /**< Reunitarise the gauge field every N steps (default 10) */
+    double tolerance;            /**< The tolerance of the gauge fixing quality (default 1e-6) */
+    QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the
+                                    delta value (default false)" */
+    QudaPrecision precision;     /**< The precision used by the algorithm */
+
+  } QudaGaugeFixParam;
+
   /*
    * Interface functions, found in interface_quda.cpp
    */
@@ -954,6 +972,15 @@ extern "C" {
    */
   QudaBLASParam newQudaBLASParam(void);
 
+  /**
+   * A new QudaGaugeFixParam should always be initialized immediately
+   * after it's defined (and prior to explicitly setting its members)
+   * using this function.  Typical usage is as follows:
+   *
+   *   QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+   */
+  QudaGaugeFixParam newQudaGaugeFixParam(void);
+
   /**
    * Print the members of QudaGaugeParam.
    * @param param The QudaGaugeParam whose elements we are to print.
@@ -990,6 +1017,12 @@ extern "C" {
    */
   void printQudaBLASParam(QudaBLASParam *param);
 
+  /**
+   * Print the members of QudaGaugeFixParam.
+   * @param param The QudaGaugeFixParam whose elements we are to print.
+   */
+  void printQudaGaugeFixParam(QudaGaugeFixParam *param);
+
   /**
    * Load the gauge field from the host.
    * @param h_gauge Base pointer to host gauge field (regardless of dimensionality)
@@ -1519,41 +1552,14 @@ extern "C" {
                     const int *X);
 
   /**
-   * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
-   * @param[in,out] gauge, gauge field to be fixed
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
-   * iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
-   * @param[in] param The parameters of the external fields and the computation settings
-   * @param[out] timeinfo
-   */
-  int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                                const unsigned int verbose_interval, const double relax_boost, const double tolerance,
-                                const unsigned int reunit_interval, const unsigned int stopWtheta,
-                                QudaGaugeParam *param, double *timeinfo);
-  /**
-   * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
+   * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with
+   * support for single GPU only.
    * @param[in,out] gauge, gauge field to be fixed
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
-   * iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
-   * @param[in] param The parameters of the external fields and the computation settings
-   * @param[out] timeinfo
+   * @param[in] gauge_param The parameters of the external fields and the computation settings
+   * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use.
+   * @param[out] timeinfo Array to track timings
    */
-  int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                                const unsigned int verbose_interval, const double alpha, const unsigned int autotune,
-                                const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param,
-                                double *timeinfo);
+  int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo);
 
   /**
    * @brief Strided Batched GEMM
diff --git a/lib/check_params.h b/lib/check_params.h
index 1566506917..b49cf04cb0 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -268,7 +268,7 @@ void printQudaCloverParam(QudaInvertParam *param)
 
 #if defined CHECK_PARAM
   if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param))
-    errorQuda("Unexpected QudaInvertParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
+    errorQuda("Unexpected QudaCloverParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
 #else
   P(struct_size, (size_t)INVALID_INT);
 #endif
@@ -1047,8 +1047,45 @@ void printQudaBLASParam(QudaBLASParam *param)
 #endif
 }
 
-// clean up
+#if defined INIT_PARAM
+QudaGaugeFixParam newQudaGaugeFixParam(void)
+{
+  QudaGaugeFixParam ret;
+#elif defined CHECK_PARAM
+static void checkGaugeFixParam(QudaGaugeFixParam *param)
+{
+#else
+void printQudaGaugeFixParam(QudaGaugeFixParam *param)
+{
+  printfQuda("QUDA gauge fix parameters:\n");
+#endif
+
+#if defined CHECK_PARAM
+  if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param))
+    errorQuda("Unexpected QudaGaugeFixParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
+#else
+  P(struct_size, (size_t)INVALID_INT);
+#endif
+
+  P(gauge_dir, INVALID_INT);
+  P(maxiter, INVALID_INT);
+  P(verbosity_interval, INVALID_INT);
+  P(reunit_interval, INVALID_INT);
+  P(ovr_relaxation_boost, INVALID_DOUBLE);
+  P(fft_alpha, INVALID_DOUBLE);
+  P(tolerance, INVALID_DOUBLE);
+
+#ifndef CHECK_PARAM
+  P(fft_autotune, QUDA_BOOLEAN_FALSE);
+  P(theta_condition, QUDA_BOOLEAN_FALSE);
+#endif
 
+#ifdef INIT_PARAM
+  return ret;
+#endif
+}
+
+// clean up
 #undef INVALID_INT
 #undef INVALID_DOUBLE
 #undef P
diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index a44f9368c3..b9aec49975 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -182,19 +182,26 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval,
-                      double alpha0, int autotune, double tolerance, int stopWtheta)
+  void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
     TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false);
-
+    
+    QudaBoolean autotune = fix_param.fft_autotune;
+    double alpha0 = fix_param.fft_alpha;
+    double tolerance = fix_param.tolerance;
+    QudaBoolean theta_condition = fix_param.theta_condition;
+    int steps = fix_param.maxiter;
+    int verbose_interval = fix_param.verbosity_interval;
+    
     profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
     if (getVerbosity() >= QUDA_SUMMARIZE) {
-      printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false");      
+      if(autotune == QUDA_BOOLEAN_TRUE) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n");
+      else printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n");      
       printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0);
-      printfQuda("\tTolerance: %lf\n", tolerance);
-      printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
-      printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
+      printfQuda("\tTolerance: %e\n", tolerance);
+      printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta");
+      printfQuda("\tMaximum number of iterations: %d\n", steps);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
     }
     
@@ -217,11 +224,11 @@ namespace quda {
     GaugeFixQuality<decltype(argQ)> gfixquality(argQ, data);
     gfixquality.apply(device::get_default_stream());
     double action0 = argQ.getAction();
-    if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
+    if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
 
     double diff = 0.0;
     int iter = 0;
-    for (iter = 0; iter < Nsteps; iter++) {
+    for (iter = 0; iter < steps; iter++) {
       for (int k = 0; k < 6; k++) {
         //------------------------------------------------------------------------
         // Set a pointer do the element k in lattice volume
@@ -285,23 +292,23 @@ namespace quda {
       double action = argQ.getAction();
       diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE)
-        printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
-      if ( autotune && ((action - action0) < -1e-14) ) {
+        printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+      if ( autotune == QUDA_BOOLEAN_TRUE && ((action - action0) < -1e-14) ) {
         if ( arg.alpha > 0.01 ) {
           arg.alpha = 0.95 * arg.alpha;
-          if(getVerbosity() >= QUDA_SUMMARIZE) printf(">>>>>>>>>>>>>> Warning: changing alpha down -> %.4e\n", arg.alpha);
+          if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha);
         }
       }
       //------------------------------------------------------------------------
       // Check gauge fix quality criterion
       //------------------------------------------------------------------------
-      if ( stopWtheta ) {   if ( argQ.getTheta() < tolerance ) break; }
+      if ( theta_condition == QUDA_BOOLEAN_TRUE ) {   if ( argQ.getTheta() < tolerance ) break; }
       else { if ( diff < tolerance ) break; }
 
       action0 = action;
     }
     if ((iter % verbose_interval) != 0 && getVerbosity() >= QUDA_SUMMARIZE)
-      printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff);
+      printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff);
     
     // Reunitarize at end
     const double unitarize_eps = 1e-14;
@@ -356,21 +363,23 @@ namespace quda {
     
     gflops = (gflops * 1e-9) / (secs);
     gbytes = gbytes / (secs * 1e9);
-    if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes);
-
+    if (getVerbosity() > QUDA_SUMMARIZE)
+      printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes);
+    
     host_free(num_failures_h);
   }
 
   template<typename Float, int nColors, QudaReconstructType recon> struct GaugeFixingFFT {
-    GaugeFixingFFT(GaugeField& data, int gauge_dir, int Nsteps, int verbose_interval,
-                   double alpha, int autotune, double tolerance, int stopWtheta)
+    GaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
     {
-      if (gauge_dir != 3) {
-	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 4>(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+      if (fix_param.gauge_dir == 4) {
+	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs\n");
+        gaugeFixingFFT<Float, recon, 4>(data, fix_param);
+      } else if (fix_param.gauge_dir == 3) {
+	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs\n");
+        gaugeFixingFFT<Float, recon, 3>(data, fix_param);	
       } else {
-	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 3>(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+	errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); 
       }
     }
   };
@@ -378,23 +387,16 @@ namespace quda {
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] fix_param Parameter struct defining the gauge fixing
    */
 #if defined(GPU_GAUGE_ALG)
-  void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha,
-                      const int autotune, const double tolerance, const int stopWtheta)
+  void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
     if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!");
-    instantiate<GaugeFixingFFT, ReconstructNo12>(data, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+    instantiate<GaugeFixingFFT, ReconstructNo12>(data, fix_param);
   }
 #else
-  void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const int, const double, const int)
+  void gaugeFixingFFT(GaugeField&, QudaGaugeFixParam &)
   {
     errorQuda("Gauge fixing has bot been built");
   }
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index b97772f43d..38b7948f99 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -223,12 +223,17 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingOVR(GaugeField &data,const int Nsteps, const int verbose_interval,
-                      const double relax_boost, const double tolerance,
-                      const int reunit_interval, const int stopWtheta)
+  void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param)
   {
     TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false);
 
+    double relax_boost = fix_param.ovr_relaxation_boost;
+    double tolerance = fix_param.tolerance;
+    QudaBoolean theta_condition = fix_param.theta_condition;
+    int steps = fix_param.maxiter;
+    int reunit_interval = fix_param.reunit_interval;
+    int verbose_interval = fix_param.verbosity_interval;
+    
     profileInternalGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     double flop = 0;
     double byte = 0;
@@ -236,10 +241,11 @@ namespace quda {
     if (getVerbosity() >= QUDA_SUMMARIZE) {
       printfQuda("\tOverrelaxation boost parameter: %e\n", relax_boost);
       printfQuda("\tTolerance: %le\n", tolerance);
-      printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
-      printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
+      printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta");
+      printfQuda("\tMaximum number of iterations: %d\n", steps);
       printfQuda("\tReunitarize at every %d steps\n", reunit_interval);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
+      printfQuda("\tComputing in %s precision\n", sizeof(Float) == sizeof(double) ? "double" : "single");
     }
     
     const double unitarize_eps = 1e-14;
@@ -313,7 +319,7 @@ namespace quda {
     flop += (double)GaugeFixQuality.flops();
     byte += (double)GaugeFixQuality.bytes();
     double action0 = argQ.getAction();
-    if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
+    if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
 
     *num_failures_h = 0;
     unitarizeLinks(data, data, num_failures_d);
@@ -324,7 +330,7 @@ namespace quda {
     GaugeFix<Float, recon, gauge_dir> gfixBorderPoints(data, relax_boost, borderpoints, true, threads);
 
     int iter = 0;
-    for (iter = 0; iter < Nsteps; iter++) {
+    for (iter = 0; iter < steps; iter++) {
       for (int p = 0; p < 2; p++) {
         if (comm_partitioned()) {
           gfixBorderPoints.setParity(p); //compute border points
@@ -413,8 +419,8 @@ namespace quda {
       double action = argQ.getAction();
       double diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE)
-        printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
-      if (stopWtheta) {
+        printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+      if (theta_condition == QUDA_BOOLEAN_TRUE) {
         if (argQ.getTheta() < tolerance) break;
       } else {
         if ( diff < tolerance ) break;
@@ -436,7 +442,7 @@ namespace quda {
       byte += (double)GaugeFixQuality.bytes();
       double action = argQ.getAction();
       double diff = abs(action0 - action);
-      if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+      if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
     }
 
     for (int i = 0; i < 2 && nlinksfaces; i++) managed_free(borderpoints[i]);
@@ -470,17 +476,16 @@ namespace quda {
   }
 
   template <typename Float, int nColor, QudaReconstructType recon> struct GaugeFixingOVR {
-  GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                 const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta)
+  GaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param)
     {
-      if (gauge_dir == 4) {
-	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 4>(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
-      } else if (gauge_dir == 3) {
-	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 3>(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
+      if (fix_param.gauge_dir == 4) {
+	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with Overrelaxation\n");
+        gaugeFixingOVR<Float, recon, 4>(data, fix_param);
+      } else if (fix_param.gauge_dir == 3) {
+	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with Overrelaxation\n");
+        gaugeFixingOVR<Float, recon, 3>(data, fix_param);
       } else {
-        errorQuda("Unexpected gauge_dir = %d", gauge_dir);
+        errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir);
       }
     }
   };
@@ -488,22 +493,15 @@ namespace quda {
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] fix_param Parameter struct defining the gauge fixing
    */
 #ifdef GPU_GAUGE_ALG
-  void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost,
-                      const double tolerance, const int reunit_interval, const int stopWtheta)
+  void gaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
-    instantiate<GaugeFixingOVR>(data, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
+    instantiate<GaugeFixingOVR>(data, fix_param);
   }
 #else
-  void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const int)
+  void gaugeFixingOVR(GaugeField&, QudaGaugeFixParam &)
   {
     errorQuda("Gauge fixing has not been built");
   }
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index ddb13df03e..59c63d9b1a 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -237,8 +237,7 @@ static TimeProfile profileMomAction("momActionQuda");
 static TimeProfile profileEnd("endQuda");
 
 //!< Profiler for GaugeFixing
-static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda");
-static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda");
+static TimeProfile profileGaugeFix("gaugeFixQuda");
 
 //!< Profiler for toal time spend between init and end
 static TimeProfile profileInit2End("initQuda-endQuda",false);
@@ -1483,6 +1482,7 @@ void endQuda(void)
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
+    profileGaugeFix.Print();
     profileEnd.Print();
 
     profileInit2End.Print();
@@ -5519,141 +5519,88 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval
   popOutputPrefix();
 }
 
-int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                              const unsigned int verbose_interval, const double relax_boost, const double tolerance,
-                              const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param,
-                              double *timeinfo)
+int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo)
 {
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL);
+  profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL);
 
-  checkGaugeParam(param);
-
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gParam(*param, gauge);
-  auto *cpuGauge = new cpuGaugeField(gParam);
-
-  // gParam.pad = getFatLinkPadding(param->X);
-  gParam.create = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type = param->type;
-  gParam.reconstruct = param->reconstruct;
-  gParam.setPrecision(gParam.Precision(), true);
-  auto *cudaInGauge = new cudaGaugeField(gParam);
-
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT);
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D);
-
-  ///if (!param->use_resident_gauge) {   // load fields onto the device
-  cudaInGauge->loadCPUField(*cpuGauge);
- /* } else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
-
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D);
-
-  if (comm_size() == 1) {
-    // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
-                   stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
-  } else {
-    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda);
-
-    // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
-                   stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
-
-    //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge
-    copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
+  if (!initialized) errorQuda("QUDA not initialized");
+  if (getVerbosity() == QUDA_DEBUG_VERBOSE) {
+    printQudaGaugeParam(g_param);
+    printQudaGaugeFixParam(fix_param);
   }
 
-  // copy the gauge field back to the host
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H);
-  cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H);
-
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  // Check parameters
+  checkGaugeParam(g_param);
+  checkGaugeFixParam(fix_param);
 
-  if (param->make_resident_gauge) {
-    if (gaugePrecise != nullptr) delete gaugePrecise;
-    gaugePrecise = cudaInGauge;
-  } else {
-    delete cudaInGauge;
+  if (g_param->location == QUDA_CUDA_FIELD_LOCATION) {
+    errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: "
+              "gaugeFixingOVR/gaugeFixingFFT");
   }
 
-  if(timeinfo){
-    timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H);
-  }
-
-  return 0;
-}
-
-int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const unsigned int Nsteps, \
-  const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \
-  const unsigned int  stopWtheta, QudaGaugeParam* param , double* timeinfo)
-{
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL);
-
-  checkGaugeParam(param);
-
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT);
-
-  GaugeFieldParam gParam(*param, gauge);
-  auto *cpuGauge = new cpuGaugeField(gParam);
+  profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
+  GaugeFieldParam gauge_param(*g_param, gauge);
+  auto *cpu_gauge = new cpuGaugeField(gauge_param);
 
-  //gParam.pad = getFatLinkPadding(param->X);
-  gParam.create      = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type   = param->type;
-  gParam.reconstruct = param->reconstruct;
-  gParam.setPrecision(gParam.Precision(), true);
-  auto *cudaInGauge = new cudaGaugeField(gParam);
-
-
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT);
-
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D);
-
-  //if (!param->use_resident_gauge) {   // load fields onto the device
-  cudaInGauge->loadCPUField(*cpuGauge);
-  /*} else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
-
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D);
-
-  // perform the update
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+  // Make GPU field
+  gauge_param.create = QUDA_NULL_FIELD_CREATE;
+  gauge_param.link_type = g_param->type;
+  gauge_param.reconstruct = g_param->reconstruct;
+  gauge_param.setPrecision(fix_param->precision, true);
+  auto *device_gauge = new cudaGaugeField(gauge_param);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
+
+  // Load gauge to device
+  profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
+  device_gauge->loadCPUField(*cpu_gauge);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
+
+  // Perform the update
+  switch (fix_param->fix_type) {
+
+  case QUDA_GAUGEFIX_TYPE_OVR:
+    if (comm_size() == 1) {
+      profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+      gaugeFixingOVR(*device_gauge, *fix_param);
+      profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+    } else {
+      // For MPI, we must perform a halo exchange
+      cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix);
+      profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+      gaugeFixingOVR(*device_gauge_extended, *fix_param);
+      profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+      copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION);
+    }
+    break;
 
-  gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+  case QUDA_GAUGEFIX_TYPE_FFT:
+    profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+    gaugeFixingFFT(*device_gauge, *fix_param);
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+    break;
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+  default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
+  }
 
-  // copy the gauge field back to the host
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H);
-  cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H);
+  // Copy the fixed gauge field back to the host.
+  profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
+  device_gauge->saveCPUField(*cpu_gauge);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
 
-  if (param->make_resident_gauge) {
+  if (g_param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
-    gaugePrecise = cudaInGauge;
+    gaugePrecise = device_gauge;
   } else {
-    delete cudaInGauge;
+    delete device_gauge;
   }
+  delete cpu_gauge;
 
   if (timeinfo) {
-    timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp
index 64ad1e4efc..04862243b6 100644
--- a/lib/milc_interface.cpp
+++ b/lib/milc_interface.cpp
@@ -2692,15 +2692,22 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int
 void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost,
                         double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink)
 {
-  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
-      (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
-      QUDA_SU3_LINKS);
-  qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
+  QudaGaugeParam gauge_param
+    = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_SU3_LINKS);
+  gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
+  QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+  fix_param.gauge_dir = gauge_dir;
+  fix_param.maxiter = Nsteps;
+  fix_param.verbosity_interval = verbose_interval;
+  fix_param.ovr_relaxation_boost = relax_boost;
+  fix_param.tolerance = tolerance;
+  fix_param.reunit_interval = reunit_interval;
+  fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
+
   double timeinfo[3];
-  computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
-    &qudaGaugeParam, timeinfo);
+  computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
 
   printfQuda("Time H2D: %lf\n", timeinfo[0]);
   printfQuda("Time to Compute: %lf\n", timeinfo[1]);
@@ -2719,16 +2726,22 @@ void qudaGaugeFixingFFT( int precision,
     void* milc_sitelink
     )
 {
-  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
-      (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
-      QUDA_GENERAL_LINKS);
-  qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
+  QudaGaugeParam gauge_param
+    = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_GENERAL_LINKS);
+  gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
+  QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+  fix_param.gauge_dir = gauge_dir;
+  fix_param.maxiter = Nsteps;
+  fix_param.verbosity_interval = verbose_interval;
+  fix_param.fft_alpha = alpha;
+  fix_param.tolerance = tolerance;
+  fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
+  fix_param.fft_autotune = autotune == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
 
   double timeinfo[3];
-  computeGaugeFixingFFTQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta, \
-    &qudaGaugeParam, timeinfo);
+  computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
 
   printfQuda("Time H2D: %lf\n", timeinfo[0]);
   printfQuda("Time to Compute: %lf\n", timeinfo[1]);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 0ab7163b3d..e4ef084b2d 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -33,20 +33,22 @@ using namespace quda;
 //***********************************************************//
 bool execute = true;
 
+// Gauge IO related
 bool gauge_load;
 bool gauge_store;
-
 void *host_gauge[4];
 
+// Define the command line options and option group for this test
 int gf_gauge_dir = 4;
 int gf_maxiter = 10000;
 int gf_verbosity_interval = 100;
 double gf_ovr_relaxation_boost = 1.5;
 double gf_fft_alpha = 0.8;
+bool gf_fft_autotune = true;
 int gf_reunit_interval = 10;
 double gf_tolerance = 1e-6;
 bool gf_theta_condition = false;
-bool gf_fft_autotune = false;
+QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR;
 
 void display_test_info()
 {
@@ -54,8 +56,9 @@ void display_test_info()
 
   switch (test_type) {
   case 0: printfQuda("\n Google testing\n"); break;
-  case 1: printfQuda("\nOVR gauge fix\n"); break;
-  case 2: printfQuda("\nFFT gauge fix\n"); break;
+  case 1:
+    printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+    break;
   default: errorQuda("Undefined test type %d given", test_type);
   }
 
@@ -69,22 +72,66 @@ void display_test_info()
              dimPartitioned(3));
 }
 
+void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
+{
+  CLI::TransformPairs<QudaGaugeFixType> fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}};
+
+  // Option group for gauge fixing related options
+  auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
+  opgroup->add_option("--gf-dir", gf_gauge_dir,
+                      "The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
+  opgroup->add_option("--gf-maxiter", gf_maxiter,
+                      "The maximun number of gauge fixing iterations to be applied (default 10000)");
+  opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval,
+                      "Print the gauge fixing progress every N steps (default 100)");
+  opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
+                      "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
+  opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)");
+  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune,
+                      "Autotune the Alpha parameter in the FFT method (default true)");
+  opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
+                      "Reunitarise the gauge field every N steps (default 10)");
+  opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)");
+  opgroup->add_option(
+    "--gf-theta-condition", gf_theta_condition,
+    "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option("--gf-fix-type", gf_fix_type, "The type of algorithm to use for fixing (default ovr)")
+    ->transform(CLI::QUDACheckedTransformer(fix_type_map));
+}
+
+void setGaugeFixParam(QudaGaugeFixParam &fix_param)
+{
+  fix_param.fix_type = gf_fix_type;
+  fix_param.gauge_dir = gf_gauge_dir;
+  fix_param.maxiter = gf_maxiter;
+  fix_param.verbosity_interval = gf_verbosity_interval;
+  fix_param.reunit_interval = gf_reunit_interval;
+  fix_param.tolerance = gf_tolerance;
+  fix_param.ovr_relaxation_boost = gf_ovr_relaxation_boost;
+  fix_param.fft_alpha = gf_fft_alpha;
+  fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  fix_param.precision = cuda_prec;
+}
+
 class GaugeAlgTest : public ::testing::Test
 {
-protected:
-  QudaGaugeParam param;
 
-  Timer<false> a0, a1;
-  double2 detu;
-  double3 plaq;
-  GaugeField *U;
+protected:
+  QudaGaugeParam gauge_param;
+  QudaGaugeFixParam fix_param;
+
+  host_timer_t host_timer_1, host_timer_2;
+  double2 det_u;
+  double2 trace_u;
+  double3 plaq_u;
+  cudaGaugeField *U;
   int nsteps;
   int nhbsteps;
   int novrsteps;
   bool coldstart;
   double beta_value;
-
-  bool unit_test;
+  RNG *randstates;
 
   void SetReunitarizationConsts()
   {
@@ -97,6 +144,13 @@ class GaugeAlgTest : public ::testing::Test
     setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
   }
 
+  bool checkDimsPartitioned()
+  {
+    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+      return true;
+    return false;
+  }
+
   bool comparePlaquette(double3 a, double3 b)
   {
     double a0, a1, a2;
@@ -104,56 +158,55 @@ class GaugeAlgTest : public ::testing::Test
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
     double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-10;
     return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
-  bool CheckDeterminant(double2 detu)
+  bool checkDeterminant(double2 det)
   {
-    double prec_val = 5e-8;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
-    return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
+    double prec_val = 1.0e-5;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-8;
+    return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val);
   }
 
   virtual void SetUp()
   {
     if (execute) {
-      setVerbosity(QUDA_VERBOSE);
-      param = newQudaGaugeParam();
 
       // Setup gauge container.
-      setWilsonGaugeParam(param);
-      param.t_boundary = QUDA_PERIODIC_T;
+      gauge_param = newQudaGaugeParam();
+      setWilsonGaugeParam(gauge_param);
+      gauge_param.t_boundary = QUDA_PERIODIC_T;
 
       // Reunitarization setup
       int *num_failures_h = (int *)mapped_malloc(sizeof(int));
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
       SetReunitarizationConsts();
 
-      a0.start();
+      host_timer_1.start();
 
       // If no field is loaded, create a physical quenched field on the device
       if (!gauge_load) {
-        GaugeFieldParam gParam(param);
-        gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
-        gParam.create = QUDA_NULL_FIELD_CREATE;
-        gParam.reconstruct = link_recon;
-        gParam.setPrecision(prec, true);
+        GaugeFieldParam device_gauge_param(gauge_param);
+        device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
+        device_gauge_param.create = QUDA_NULL_FIELD_CREATE;
+        device_gauge_param.reconstruct = link_recon;
+        device_gauge_param.setPrecision(cuda_prec, true);
         for (int d = 0; d < 4; d++) {
-          if (comm_dim_partitioned(d)) gParam.r[d] = 2;
-          gParam.x[d] += 2 * gParam.r[d];
+          if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2;
+          device_gauge_param.x[d] += 2 * device_gauge_param.r[d];
         }
 
-        U = new cudaGaugeField(gParam);
+        U = new cudaGaugeField(device_gauge_param);
 
-        RNG randstates(*U, 1234);
+        RNG randstates(*U, quda_seed);
 
         nsteps = heatbath_num_steps;
         nhbsteps = heatbath_num_heatbath_per_step;
         novrsteps = heatbath_num_overrelax_per_step;
         coldstart = heatbath_coldstart;
         beta_value = heatbath_beta_value;
-        a1.start();
+        host_timer_2.start();
 
         if (coldstart)
           InitGaugeField(*U);
@@ -169,37 +222,37 @@ class GaugeAlgTest : public ::testing::Test
           unitarizeLinks(*U, num_failures_d);
           qudaDeviceSynchronize();
           if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-          plaq = plaquette(*U);
-          printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+          plaq_u = plaquette(*U);
+          printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
         }
 
-        a1.stop();
-        printfQuda("Time Monte -> %.6f s\n", a1.last());
+        host_timer_2.stop();
+        printfQuda("Time Monte -> %.6f s\n", host_timer_2.last());
       } else {
 
         // If a field is loaded, create a device field and copy
         printfQuda("Copying gauge field from host\n");
-        param.location = QUDA_CPU_FIELD_LOCATION;
-        GaugeFieldParam gauge_field_param(param, host_gauge);
-        gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-        GaugeField *host = GaugeField::Create(gauge_field_param);
+        gauge_param.location = QUDA_CPU_FIELD_LOCATION;
+        GaugeFieldParam host_gauge_param(gauge_param, host_gauge);
+        host_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+        GaugeField *host = GaugeField::Create(host_gauge_param);
 
         // switch the parameters for creating the mirror precise cuda gauge field
-        gauge_field_param.create = QUDA_NULL_FIELD_CREATE;
-        gauge_field_param.reconstruct = param.reconstruct;
-        gauge_field_param.setPrecision(param.cuda_prec, true);
+        host_gauge_param.create = QUDA_NULL_FIELD_CREATE;
+        host_gauge_param.reconstruct = gauge_param.reconstruct;
+        host_gauge_param.setPrecision(gauge_param.cuda_prec, true);
 
         if (comm_partitioned()) {
           int R[4] = {0, 0, 0, 0};
           for (int d = 0; d < 4; d++)
             if (comm_dim_partitioned(d)) R[d] = 2;
           static TimeProfile GaugeFix("GaugeFix");
-          cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param);
+          cudaGaugeField *tmp = new cudaGaugeField(host_gauge_param);
           tmp->copy(*host);
           U = createExtendedGauge(*tmp, R, GaugeFix);
           delete tmp;
         } else {
-          U = new cudaGaugeField(gauge_field_param);
+          U = new cudaGaugeField(host_gauge_param);
           U->copy(*host);
         }
 
@@ -210,20 +263,29 @@ class GaugeAlgTest : public ::testing::Test
         unitarizeLinks(*U, num_failures_d);
         qudaDeviceSynchronize();
         if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-
-        plaq = plaquette(*U);
-        printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       }
 
-      // If a specific test type is requested, perfrom it now and then
+      // Unfixed Gauge data
+      plaq_u = plaquette(*U);
+      det_u = getLinkDeterminant(*U);
+      trace_u = getLinkTrace(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
+      printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y);
+      printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
+
+      // If a specific test type is requested, perform it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
-      case 0:
-        // Do the Google testing
+      case 0: // Do the Google testing
+        // Set gauge fixing params from the command line
+        // and adjust for this test type
+        fix_param = newQudaGaugeFixParam();
+        setGaugeFixParam(fix_param);
+        break;
+      case 1: // Do a specific test
+        run();
         break;
-      case 1: run_ovr(); break;
-      case 2: run_fft(); break;
-      default: errorQuda("Invalid test type %d", test_type);
+      default: errorQuda("Invalid test type %d ", test_type);
       }
 
       host_free(num_failures_h);
@@ -233,55 +295,92 @@ class GaugeAlgTest : public ::testing::Test
   virtual void TearDown()
   {
     if (execute) {
-      detu = getLinkDeterminant(*U);
-      double2 tru = getLinkTrace(*U);
-      printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-      printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
+
+      // Compare gauge fixed data with original data
+      auto plaq_gf = plaquette(*U);
+      auto det_gf = getLinkDeterminant(*U);
+      auto trace_gf = getLinkTrace(*U);
+      printfQuda("Plaq:     %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
+      printfQuda("Plaq GF:  %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      printfQuda("Det:      %.16e, %.16e\n", det_u.x, det_u.y);
+      printfQuda("Det GF:   %.16e, %.16e\n", det_gf.x, det_gf.y);
+      printfQuda("Trace:    %.16e, %.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
+      printfQuda("Trace GF: %.16e, %.16e\n", trace_gf.x / 3.0, trace_gf.y / 3.0);
+
+      // As an observable, the plaquette value must remain invariant after
+      // gauge fixing.
+      ASSERT_TRUE(comparePlaquette(plaq_u, plaq_gf));
+
+      // The determinant of any SU(N) gauge field element must be (1.0,0.0) to
+      // machine precision
+      ASSERT_TRUE(checkDeterminant(det_gf));
 
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
 
-      a0.stop();
-      printfQuda("Time -> %.6f s\n", a0.last());
+      host_timer_1.stop();
+      printfQuda("Time -> %.6f s\n", host_timer_1.last());
     }
     // If we performed a specific instance, switch off the
     // Google testing.
     if (test_type != 0) execute = false;
+    saveTuneCache();
   }
 
-  virtual void run_ovr()
+  virtual void run()
   {
     if (execute) {
-      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
-                     gf_reunit_interval, gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
+      // Set gauge fixing params from the command line
+      fix_param = newQudaGaugeFixParam();
+      setGaugeFixParam(fix_param);
+
+      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb",
+                 get_gaugefix_str(fix_param.fix_type));
+
+      // Setup CPU gauge container.
+      gauge_param = newQudaGaugeParam();
+      setWilsonGaugeParam(gauge_param);
+      gauge_param.t_boundary = QUDA_PERIODIC_T;
+      gauge_param.location = QUDA_CPU_FIELD_LOCATION;
+
+      void *cpu_gauge[4];
+      for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * cpu_prec); }
+
+      GaugeFieldParam param(gauge_param);
+      param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+      param.create = QUDA_NULL_FIELD_CREATE;
+      param.link_type = gauge_param.type;
+      param.reconstruct = gauge_param.reconstruct;
+      param.setPrecision(cuda_prec, true);
+
+      auto *gauge = new cudaGaugeField(param);
+
+      // Copy the target U field (extended) into regular GPU field, then
+      // save to a CPU field. This is done to test the CPU interface function
+      // and instructs the user how to use void pointers for the gauge data,
+      // and the gauge_param container.
+      copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
+      saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
+      delete gauge;
+
+      // Compute gauge fixing via interface
+      computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr);
+
+      // cpu_gauge now contains the fixed gauge on the CPU. We now load that gauge
+      // to the device for inspection in the TearDown.
+      GaugeFieldParam fixed_param(gauge_param, cpu_gauge);
+      auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param);
+
+      // Copy the CPU field to U.
+      U->loadCPUField(*fixed_cpu_gauge);
+
+      for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]);
+      delete fixed_cpu_gauge;
+
       // Save if output string is specified
       if (gauge_store) save_gauge();
-    }
-  }
-  virtual void run_fft()
-  {
-    if (execute) {
-      if (!comm_partitioned()) {
-        printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                       gf_theta_condition);
-
-        auto plaq_gf = plaquette(*U);
-        printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-        printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-        ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-        saveTuneCache();
-        // Save if output string is specified
-        if (gauge_store) save_gauge();
-      } else {
-        errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
-      }
+      saveTuneCache();
     }
   }
 
@@ -289,20 +388,18 @@ class GaugeAlgTest : public ::testing::Test
   {
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
 
-    QudaGaugeParam gauge_param = newQudaGaugeParam();
-    setWilsonGaugeParam(gauge_param);
-
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
-    GaugeFieldParam gParam(param);
-    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type = param.type;
-    gParam.reconstruct = param.reconstruct;
-    gParam.setPrecision(gParam.Precision(), true);
+    GaugeFieldParam param(gauge_param);
+    param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+    param.create = QUDA_NULL_FIELD_CREATE;
+    param.link_type = gauge_param.type;
+    param.reconstruct = gauge_param.reconstruct;
+    param.setPrecision(param.Precision(), true);
 
-    cudaGaugeField *gauge = new cudaGaugeField(gParam);
+    cudaGaugeField *gauge;
+    gauge = new cudaGaugeField(param);
 
     // copy into regular field
     copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
@@ -319,21 +416,22 @@ class GaugeAlgTest : public ::testing::Test
 TEST_F(GaugeAlgTest, Generation)
 {
   if (execute && !gauge_load) {
-    detu = getLinkDeterminant(*U);
-    ASSERT_TRUE(CheckDeterminant(detu));
+    // Assert that the generated gauge is
+    // on the SU(N) manifold
+    det_u = getLinkDeterminant(*U);
+    ASSERT_TRUE(checkDeterminant(det_u));
   }
 }
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
-    printfQuda("Landau gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    printfQuda("Landau gauge fixing with overrelaxation method\n");
+
+    fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+    fix_param.gauge_dir = 4;
+
+    gaugeFixingOVR(*U, fix_param);
     saveTuneCache();
   }
 }
@@ -341,13 +439,12 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
-    printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    printfQuda("Coulomb gauge fixing with overrelaxation method\n");
+
+    fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+    fix_param.gauge_dir = 3;
+
+    gaugeFixingOVR(*U, fix_param);
     saveTuneCache();
   }
 }
@@ -356,13 +453,12 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 {
   if (execute) {
     if (!comm_partitioned()) {
-      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+      printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
+
+      fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
+      fix_param.gauge_dir = 4;
+
+      gaugeFixingFFT(*U, fix_param);
       saveTuneCache();
     }
   }
@@ -372,42 +468,17 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
   if (execute) {
     if (!comm_partitioned()) {
-      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+      printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
+
+      fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
+      fix_param.gauge_dir = 3;
+
+      gaugeFixingFFT(*U, fix_param);
       saveTuneCache();
     }
   }
 }
 
-void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
-{
-  // Option group for gauge fixing related options
-  auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
-  opgroup->add_option("--gf-dir", gf_gauge_dir,
-                      "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
-  opgroup->add_option("--gf-maxiter", gf_maxiter,
-                      "The maximun number of gauge fixing iterations to be applied (default 10000) ");
-  opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval,
-                      "Print the gauge fixing progress every N steps (default 100)");
-  opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
-                      "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
-  opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)");
-  opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
-                      "Reunitarise the gauge field every N steps (default 10)");
-  opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)");
-  opgroup->add_option(
-    "--gf-theta-condition", gf_theta_condition,
-    "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
-  opgroup->add_option(
-    "--gf-fft-autotune", gf_fft_autotune,
-    "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)");
-}
-
 int main(int argc, char **argv)
 {
   // initalize google test, includes command line options
@@ -429,21 +500,25 @@ int main(int argc, char **argv)
 
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
-
   QudaGaugeParam gauge_param = newQudaGaugeParam();
-  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
-
+  setVerbosity(verbosity);
+  setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
+  gauge_param.t_boundary = QUDA_PERIODIC_T;
   setDims(gauge_param.X);
 
+  // call srand() with a rank-dependent seed
+  initRand();
+  // initialize the QUDA library
+  initQuda(device_ordinal);
+
   display_test_info();
 
+  // If we are passing a gauge field to the test, we must allocate host memory.
+  // If no gauge is passed, we generate a quenched field on the device.
   gauge_load = strcmp(latfile, "");
   gauge_store = strcmp(gauge_outfile, "");
 
-  // If we are passing a gauge field to the test, we must allocate host memory.
-  // If no gauge is passed, we generate a quenched field on the device.
   if (gauge_load) {
     printfQuda("Loading gauge field from host\n");
     for (int dir = 0; dir < 4; dir++) {
@@ -452,11 +527,8 @@ int main(int argc, char **argv)
     constructHostGaugeField(host_gauge, gauge_param, argc, argv);
   }
 
-  // call srand() with a rank-dependent seed
-  initRand();
-
-  // initialize the QUDA library
-  initQuda(device_ordinal);
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
 
   // Ensure gtest prints only from rank 0
   ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
@@ -464,7 +536,6 @@ int main(int argc, char **argv)
 
   // return code for google test
   int test_rc = RUN_ALL_TESTS();
-
   if (gauge_load) {
     // release memory
     for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]);
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index f87005c063..300f0b1012 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -56,6 +56,7 @@ int main(int argc, char **argv)
 {
   // command line options
   auto app = make_app();
+  add_heatbath_option_group(app);
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
@@ -124,7 +125,7 @@ int main(int argc, char **argv)
     for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
     cudaGaugeField *gaugeEx = new cudaGaugeField(gParamEx);
     // CURAND random generator initialization
-    RNG *randstates = new RNG(*gauge, 1234);
+    RNG *randstates = new RNG(*gauge, quda_seed);
 
     int nsteps = heatbath_num_steps;
     int nwarm = heatbath_warmup_steps;
diff --git a/tests/invert_test.cpp b/tests/invert_test.cpp
index e559f04520..1ba40618c9 100644
--- a/tests/invert_test.cpp
+++ b/tests/invert_test.cpp
@@ -304,7 +304,7 @@ int main(int argc, char **argv)
   std::vector<double> gflops(Nsrc);
   std::vector<int> iter(Nsrc);
 
-  auto *rng = new quda::RNG(*check, 1234);
+  auto *rng = new quda::RNG(*check, quda_seed);
 
   for (int i = 0; i < Nsrc; i++) {
     // Populate the host spinor with random numbers.
diff --git a/tests/multigrid_evolve_test.cpp b/tests/multigrid_evolve_test.cpp
index de6e3f22d9..fb63443ae8 100644
--- a/tests/multigrid_evolve_test.cpp
+++ b/tests/multigrid_evolve_test.cpp
@@ -244,7 +244,7 @@ int main(int argc, char **argv)
     obs_param.compute_qcharge = QUDA_BOOLEAN_TRUE;
 
     // CURAND random generator initialization
-    RNG *randstates = new RNG(*gauge, 1234);
+    RNG *randstates = new RNG(*gauge, quda_seed);
     int nsteps = 10;
     int nhbsteps = 1;
     int novrsteps = 1;
diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp
index b2f4e96588..a0a39c7adb 100644
--- a/tests/staggered_invert_test.cpp
+++ b/tests/staggered_invert_test.cpp
@@ -299,7 +299,7 @@ int main(int argc, char **argv)
   //-----------------------------------------------------------------------------------
 
   // Prepare rng
-  auto *rng = new quda::RNG(*ref, 1234);
+  auto *rng = new quda::RNG(*ref, quda_seed);
 
   // Performance measuring
   std::vector<double> time(Nsrc);
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index c62ca10a09..44b12bb528 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -94,7 +94,6 @@ void add_su3_option_group(std::shared_ptr<QUDAApp> quda_app)
 
   opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)")
     ->transform(CLI::QUDACheckedTransformer(wflow_type_map));
-  ;
 
   opgroup->add_option("--su3-measurement-interval", measurement_interval,
                       "Measure the field energy and topological charge every Nth step (default 5) ");
@@ -117,28 +116,30 @@ int main(int argc, char **argv)
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
-  QudaGaugeParam gauge_param = newQudaGaugeParam();
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
 
+  initQuda(device_ordinal);
+  setVerbosity(verbosity);
+
+  // call srand() with a rank-dependent seed
+  initRand();
+
+  QudaGaugeParam gauge_param = newQudaGaugeParam();
   setWilsonGaugeParam(gauge_param);
   gauge_param.t_boundary = QUDA_PERIODIC_T;
   setDims(gauge_param.X);
 
-  void *gauge[4], *new_gauge[4];
+  // All user inputs now defined
+  display_test_info();
 
+  // *** QUDA parameters begin here.
+  void *gauge[4], *new_gauge[4];
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
 
-  initQuda(device_ordinal);
-
-  setVerbosity(verbosity);
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
@@ -151,9 +152,6 @@ int main(int argc, char **argv)
 
 #ifdef GPU_GAUGE_TOOLS
 
-  // All user inputs now defined
-  display_test_info();
-
   // Topological charge and gauge energy
   double q_charge_check = 0.0;
   // Size of floating point data
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 5dd4ab8665..4a500bdab7 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -9,6 +9,8 @@ int device_ordinal = -1;
 int device_ordinal = 0;
 #endif
 
+int quda_seed = 1234;
+
 int rank_order;
 std::array<int, 4> gridsize_from_cmdline = {1, 1, 1, 1};
 auto &grid_x = gridsize_from_cmdline[0];
@@ -501,6 +503,8 @@ std::shared_ptr<QUDAApp> make_app(std::string app_description, std::string app_n
   quda_app->add_option("--save-gauge", gauge_outfile,
                        "Save gauge field \" file \" for the test (requires QIO, heatbath test only)");
 
+  quda_app->add_option("--seed", quda_seed, "Seed value for use in test suite (default 1234)")->check(CLI::PositiveNumber);
+
   quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline,
                        "The pipeline length for fused solution accumulation (default 0, no pipelining)");
 
diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h
index 10c8d775f2..77ac51f4cf 100644
--- a/tests/utils/command_line_params.h
+++ b/tests/utils/command_line_params.h
@@ -151,6 +151,7 @@ template <typename T> std::string inline get_string(CLI::TransformPairs<T> &map,
 // }
 // parameters
 
+extern int quda_seed;
 extern int device_ordinal;
 extern int rank_order;
 extern bool native_blas_lapack;
diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp
index f34bf7bf92..982ebd66cd 100644
--- a/tests/utils/host_utils.cpp
+++ b/tests/utils/host_utils.cpp
@@ -69,6 +69,7 @@ void setQudaPrecisions()
   if (prec_eigensolver == QUDA_INVALID_PRECISION) prec_eigensolver = prec_sloppy;
   if (prec_precondition == QUDA_INVALID_PRECISION) prec_precondition = prec_sloppy;
   if (prec_null == QUDA_INVALID_PRECISION) prec_null = prec_precondition;
+  if (prec_refinement_sloppy == QUDA_INVALID_PRECISION) prec_refinement_sloppy = prec_precondition;
   if (smoother_halo_prec == QUDA_INVALID_PRECISION) smoother_halo_prec = prec_null;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
   if (link_recon_precondition == QUDA_RECONSTRUCT_INVALID) link_recon_precondition = link_recon_sloppy;
@@ -319,7 +320,7 @@ void initRand()
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 #endif
 
-  srand(17 * rank + 137);
+  srand(17 * rank + 137 + quda_seed);
 }
 
 void setDims(int *X)
@@ -979,8 +980,11 @@ int x4_from_full_index(int i)
 template <typename Float> void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param)
 {
   // Apply spatial scaling factor (u0) to spatial links
-  for (int d = 0; d < 3; d++) {
-    for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] /= param->anisotropy; }
+  if (param->anisotropy != 1.0) {
+    double aniso_inv = 1.0 / param->anisotropy;
+    for (int d = 0; d < 3; d++) {
+      for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; }
+    }
   }
 
   // Apply boundary conditions to temporal links
diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp
index 3747b08605..714b75f862 100644
--- a/tests/utils/misc.cpp
+++ b/tests/utils/misc.cpp
@@ -151,6 +151,19 @@ const char *get_contract_str(QudaContractType type)
   return ret;
 }
 
+const char *get_gaugefix_str(QudaGaugeFixType type)
+{
+  const char *ret;
+
+  switch (type) {
+  case QUDA_GAUGEFIX_TYPE_OVR: ret = "Overrelaxation"; break;
+  case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break;
+  default: ret = "unknown"; break;
+  }
+
+  return ret;
+}
+
 const char *get_eig_spectrum_str(QudaEigSpectrumType type)
 {
   const char *ret;
diff --git a/tests/utils/misc.h b/tests/utils/misc.h
index 5c35480840..bc4c8a2e4b 100644
--- a/tests/utils/misc.h
+++ b/tests/utils/misc.h
@@ -21,6 +21,7 @@ const char *get_eig_type_str(QudaEigType type);
 const char *get_ritz_location_str(QudaFieldLocation type);
 const char *get_memory_type_str(QudaMemoryType type);
 const char *get_contract_str(QudaContractType type);
+const char *get_gaugefix_str(QudaGaugeFixType type);
 
 #define XUP 0
 #define YUP 1