MATH-1597: LowDiscrepancySequence supplier/jump for Halton and Sobol

apache · Jul 15, 2021 · ac65dca · ac65dca
1 parent 7f42535
commit ac65dca
Show file tree

Hide file tree

Showing 31 changed files with 1,273 additions and 1,769 deletions.
diff --git a/...rg/apache/commons/math4/legacy/analysis/interpolation/UnivariatePeriodicInterpolator.java b/...rg/apache/commons/math4/legacy/analysis/interpolation/UnivariatePeriodicInterpolator.java
@@ -114,7 +114,7 @@ public UnivariateFunction interpolate(double[] xval,
             y[index] = yval[i];
         }
 
-        SortInPlace.ASCENDING.accept(x, y);
+        SortInPlace.ASCENDING.apply(x, y);
 
         final UnivariateFunction f = interpolator.interpolate(x, y);
         return new UnivariateFunction() {

diff --git a/.../org/apache/commons/math4/legacy/analysis/polynomials/PolynomialFunctionLagrangeForm.java b/.../org/apache/commons/math4/legacy/analysis/polynomials/PolynomialFunctionLagrangeForm.java
@@ -78,7 +78,7 @@ public PolynomialFunctionLagrangeForm(double x[], double y[])
         coefficientsComputed = false;
 
         if (!verifyInterpolationArray(x, y, false)) {
-            SortInPlace.ASCENDING.accept(this.x, this.y);
+            SortInPlace.ASCENDING.apply(this.x, this.y);
             // Second check in case some abscissa is duplicated.
             verifyInterpolationArray(this.x, this.y, true);
         }
@@ -183,7 +183,7 @@ public static double evaluate(double x[], double y[], double z)
         System.arraycopy(x, 0, xNew, 0, x.length);
         System.arraycopy(y, 0, yNew, 0, y.length);
 
-        SortInPlace.ASCENDING.accept(xNew, yNew);
+        SortInPlace.ASCENDING.apply(xNew, yNew);
         // Second check in case some abscissa is duplicated.
         verifyInterpolationArray(xNew, yNew, true);
         return evaluateInternal(xNew, yNew, z);

diff --git a/.../src/main/java/org/apache/commons/math4/legacy/analysis/polynomials/PolynomialsUtils.java b/.../src/main/java/org/apache/commons/math4/legacy/analysis/polynomials/PolynomialsUtils.java
@@ -366,7 +366,9 @@ public static double[] shift(final double[] coefficients,
     private static PolynomialFunction buildPolynomial(final int degree,
                                                       final List<BigFraction> coefficients,
                                                       final RecurrenceCoefficientsGenerator generator) {
-
+        // Synchronizing on a method parameter is not safe; however, in this
+        // case, the lock object is an immutable field that belongs to this
+        // class.
         synchronized (coefficients) {
             final int maxDegree = (int) AccurateMath.floor(AccurateMath.sqrt(2 * coefficients.size())) - 1;
             if (degree > maxDegree) {

diff --git a/...acy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java b/...acy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
@@ -46,40 +46,46 @@
 
 /**
  * <p>Represents an <a href="http://en.wikipedia.org/wiki/Empirical_distribution_function">
- * empirical probability distribution</a> -- a probability distribution derived
+ * empirical probability distribution</a>: Probability distribution derived
  * from observed data without making any assumptions about the functional form
  * of the population distribution that the data come from.</p>
  *
- * <p>An <code>EmpiricalDistribution</code> maintains data structures, called
- * <i>distribution digests</i>, that describe empirical distributions and
- * support the following operations: <ul>
- * <li>loading the distribution from a file of observed data values</li>
- * <li>dividing the input data into "bin ranges" and reporting bin frequency
- *     counts (data for histogram)</li>
- * <li>reporting univariate statistics describing the full set of data values
- *     as well as the observations within each bin</li>
- * <li>generating random values from the distribution</li>
+ * <p>An {@code EmpiricalDistribution} maintains data structures called
+ * <i>distribution digests</i> that describe empirical distributions and
+ * support the following operations:
+ * <ul>
+ *  <li>loading the distribution from a file of observed data values</li>
+ *  <li>dividing the input data into "bin ranges" and reporting bin frequency
+ *      counts (data for histogram)</li>
+ *  <li>reporting univariate statistics describing the full set of data values
+ *      as well as the observations within each bin</li>
+ *  <li>generating random values from the distribution</li>
  * </ul>
- * Applications can use <code>EmpiricalDistribution</code> to build grouped
+ *
+ * Applications can use {@code EmpiricalDistribution} to build grouped
  * frequency histograms representing the input data or to generate random values
- * "like" those in the input file -- i.e., the values generated will follow the
+ * "like" those in the input file, i.e. the values generated will follow the
  * distribution of the values in the file.
  *
  * <p>The implementation uses what amounts to the
  * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
  * Variable Kernel Method</a> with Gaussian smoothing:<p>
  * <strong>Digesting the input file</strong>
- * <ol><li>Pass the file once to compute min and max.</li>
- * <li>Divide the range from min-max into <code>binCount</code> "bins."</li>
- * <li>Pass the data file again, computing bin counts and univariate
- *     statistics (mean, std dev.) for each of the bins </li>
- * <li>Divide the interval (0,1) into subintervals associated with the bins,
- *     with the length of a bin's subinterval proportional to its count.</li></ol>
- * <strong>Generating random values from the distribution</strong><ol>
- * <li>Generate a uniformly distributed value in (0,1) </li>
- * <li>Select the subinterval to which the value belongs.
- * <li>Generate a random Gaussian value with mean = mean of the associated
- *     bin and std dev = std dev of associated bin.</li></ol>
+ * <ol>
+ *  <li>Pass the file once to compute min and max.</li>
+ *  <li>Divide the range from min to max into {@code binCount} bins.</li>
+ *  <li>Pass the data file again, computing bin counts and univariate
+ *      statistics (mean and std dev.) for each bin.</li>
+ *  <li>Divide the interval (0,1) into subintervals associated with the bins,
+ *      with the length of a bin's subinterval proportional to its count.</li>
+ * </ol>
+ * <strong>Generating random values from the distribution</strong>
+ * <ol>
+ *  <li>Generate a uniformly distributed value in (0,1) </li>
+ *  <li>Select the subinterval to which the value belongs.
+ *  <li>Generate a random Gaussian value with mean = mean of the associated
+ *      bin and std dev = std dev of associated bin.</li>
+ * </ol>
  *
  * <p>EmpiricalDistribution implements the {@link ContinuousDistribution} interface
  * as follows.  Given x within the range of values in the dataset, let B
@@ -91,49 +97,36 @@
  * grouped frequency distribution at the bin endpoints and interpolates within
  * bins using within-bin kernels.</p>
  *
- *<strong>USAGE NOTES:</strong><ul>
- *<li>The <code>binCount</code> is set by default to 1000.  A good rule of thumb
- *    is to set the bin count to approximately the length of the input file divided
- *    by 10. </li>
- *<li>The input file <i>must</i> be a plain text file containing one valid numeric
- *    entry per line.</li>
+ * <strong>USAGE NOTES:</strong>
+ * <ul>
+ * <li>The {@code binCount} is set by default to 1000.  A good rule of thumb
+ *     is to set the bin count to approximately the length of the input file divided
+ *     by 10. </li>
+ * <li>The input file <i>must</i> be a plain text file containing one valid numeric
+ *     entry per line.</li>
  * </ul>
- *
  */
 public class EmpiricalDistribution extends AbstractRealDistribution
     implements ContinuousDistribution {
-
     /** Default bin count. */
     public static final int DEFAULT_BIN_COUNT = 1000;
-
     /** Character set for file input. */
     private static final String FILE_CHARSET = "US-ASCII";
-
-    /** Serializable version identifier. */
-    private static final long serialVersionUID = 5729073523949762654L;
-
-    /** List of SummaryStatistics objects characterizing the bins. */
+    /** Bins' characteristics. */
     private final List<SummaryStatistics> binStats;
-
     /** Sample statistics. */
     private SummaryStatistics sampleStats;
-
     /** Max loaded value. */
     private double max = Double.NEGATIVE_INFINITY;
-
     /** Min loaded value. */
     private double min = Double.POSITIVE_INFINITY;
-
     /** Grid size. */
     private double delta;
-
-    /** number of bins. */
+    /** Number of bins. */
     private final int binCount;
-
-    /** is the distribution loaded? */
+    /** Whether the distribution is loaded. */
     private boolean loaded;
-
-    /** upper bounds of subintervals in (0,1) "belonging" to the bins. */
+    /** Upper bounds of subintervals in (0,1) belonging to the bins. */
     private double[] upperBounds;
 
     /**
@@ -247,11 +240,10 @@ public void load(File file) throws IOException {
     }
 
     /**
-     * Provides methods for computing <code>sampleStats</code> and
-     * <code>beanStats</code> abstracting the source of data.
+     * Provides methods for computing {@code sampleStats} and
+     * {@code beanStats} abstracting the source of data.
      */
-    private abstract class DataAdapter{
-
+    private abstract class DataAdapter {
         /**
          * Compute bin stats.
          *
@@ -265,24 +257,21 @@ private abstract class DataAdapter{
          * @throws IOException if an error occurs computing sample stats
          */
         public abstract void computeStats() throws IOException;
-
     }
 
     /**
-     * <code>DataAdapter</code> for data provided through some input stream.
+     * {@code DataAdapter} for data provided through some input stream.
      */
-    private class StreamDataAdapter extends DataAdapter{
-
+    private class StreamDataAdapter extends DataAdapter {
         /** Input stream providing access to the data. */
-        private BufferedReader inputStream;
+        private final BufferedReader inputStream;
 
         /**
          * Create a StreamDataAdapter from a BufferedReader.
          *
          * @param in BufferedReader input stream
          */
         StreamDataAdapter(BufferedReader in){
-            super();
             inputStream = in;
         }
 
@@ -298,7 +287,6 @@ public void computeBinStats() throws IOException {
             }
 
             inputStream.close();
-            inputStream = null;
         }
 
         /** {@inheritDoc} */
@@ -312,15 +300,13 @@ public void computeStats() throws IOException {
                 sampleStats.addValue(val);
             }
             inputStream.close();
-            inputStream = null;
         }
     }
 
     /**
-     * <code>DataAdapter</code> for data provided as array of doubles.
+     * {@code DataAdapter} for data provided as array of doubles.
      */
     private class ArrayDataAdapter extends DataAdapter {
-
         /** Array of input  data values. */
         private final double[] inputArray;
 
@@ -331,7 +317,6 @@ private class ArrayDataAdapter extends DataAdapter {
          * @throws NullArgumentException if in is null
          */
         ArrayDataAdapter(double[] in) {
-            super();
             NullArgumentException.check(in);
             inputArray = in;
         }
@@ -349,8 +334,7 @@ public void computeStats() throws IOException {
         @Override
         public void computeBinStats() throws IOException {
             for (int i = 0; i < inputArray.length; i++) {
-                SummaryStatistics stats =
-                    binStats.get(findBin(inputArray[i]));
+                SummaryStatistics stats = binStats.get(findBin(inputArray[i]));
                 stats.addValue(inputArray[i]);
             }
         }
@@ -362,34 +346,32 @@ public void computeBinStats() throws IOException {
      * @param da object providing access to the data
      * @throws IOException  if an IO error occurs
      */
-    private void fillBinStats(final DataAdapter da)
-        throws IOException {
+    private void fillBinStats(final DataAdapter da) throws IOException {
         // Set up grid
         min = sampleStats.getMin();
         max = sampleStats.getMax();
-        delta = (max - min)/binCount;
+        delta = (max - min) / binCount;
 
         // Initialize binStats ArrayList
         if (!binStats.isEmpty()) {
             binStats.clear();
         }
         for (int i = 0; i < binCount; i++) {
             SummaryStatistics stats = new SummaryStatistics();
-            binStats.add(i,stats);
+            binStats.add(i, stats);
         }
 
         // Filling data in binStats Array
         da.computeBinStats();
 
         // Assign upperBounds based on bin counts
         upperBounds = new double[binCount];
-        upperBounds[0] =
-        ((double) binStats.get(0).getN()) / (double) sampleStats.getN();
-        for (int i = 1; i < binCount-1; i++) {
-            upperBounds[i] = upperBounds[i-1] +
-            ((double) binStats.get(i).getN()) / (double) sampleStats.getN();
+        upperBounds[0] = binStats.get(0).getN() / (double) sampleStats.getN();
+        for (int i = 1; i < binCount - 1; i++) {
+            upperBounds[i] = upperBounds[i - 1] +
+                binStats.get(i).getN() / (double) sampleStats.getN();
         }
-        upperBounds[binCount-1] = 1.0d;
+        upperBounds[binCount - 1] = 1d;
     }
 
     /**
@@ -399,9 +381,8 @@ private void fillBinStats(final DataAdapter da)
      * @return the index of the bin containing the value
      */
     private int findBin(double value) {
-        return AccurateMath.min(
-                AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
-                binCount - 1);
+        return AccurateMath.min(AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
+                                binCount - 1);
     }
 
     /**
@@ -490,7 +471,7 @@ public boolean isLoaded() {
         return loaded;
     }
 
-    // Distribution methods ---------------------------
+    // Distribution methods.
 
     /**
      * {@inheritDoc}
@@ -588,21 +569,22 @@ public double cumulativeProbability(double x) {
      */
     @Override
     public double inverseCumulativeProbability(final double p) {
-        if (p < 0.0 || p > 1.0) {
+        if (p < 0 ||
+            p > 1) {
             throw new OutOfRangeException(p, 0, 1);
         }
 
-        if (p == 0.0) {
+        if (p == 0) {
             return getSupportLowerBound();
         }
 
-        if (p == 1.0) {
+        if (p == 1) {
             return getSupportUpperBound();
         }
 
         int i = 0;
         while (cumBinP(i) < p) {
-            i++;
+            ++i;
         }
 
         final ContinuousDistribution kernel = getKernel(binStats.get(i));

diff --git a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/ml/clustering/Cluster.java b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/ml/clustering/Cluster.java
@@ -17,7 +17,6 @@
 
 package org.apache.commons.math4.legacy.ml.clustering;
 
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -26,10 +25,7 @@
  * @param <T> the type of points that can be clustered
  * @since 3.2
  */
-public class Cluster<T extends Clusterable> implements Serializable {
-
-    /** Serializable version identifier. */
-    private static final long serialVersionUID = -3442297081515880464L;
+public class Cluster<T extends Clusterable> {
 
     /** The points contained in this cluster. */
     private final List<T> points;

diff --git a/...ath-legacy/src/main/java/org/apache/commons/math4/legacy/ml/distance/DistanceMeasure.java b/...ath-legacy/src/main/java/org/apache/commons/math4/legacy/ml/distance/DistanceMeasure.java
@@ -16,16 +16,14 @@
  */
 package org.apache.commons.math4.legacy.ml.distance;
 
-import java.io.Serializable;
-
 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
 
 /**
  * Interface for distance measures of n-dimensional vectors.
  *
  * @since 3.2
  */
-public interface DistanceMeasure extends Serializable {
+public interface DistanceMeasure {
 
     /**
      * Compute the distance between two n-dimensional vectors.