Skip to content

Commit

Permalink
MATH-1597: LowDiscrepancySequence supplier/jump for Halton and Sobol
Browse files Browse the repository at this point in the history
  • Loading branch information
samyBadjoudj committed Jul 15, 2021
1 parent 7f42535 commit ac65dca
Show file tree
Hide file tree
Showing 31 changed files with 1,273 additions and 1,769 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public UnivariateFunction interpolate(double[] xval,
y[index] = yval[i];
}

SortInPlace.ASCENDING.accept(x, y);
SortInPlace.ASCENDING.apply(x, y);

final UnivariateFunction f = interpolator.interpolate(x, y);
return new UnivariateFunction() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public PolynomialFunctionLagrangeForm(double x[], double y[])
coefficientsComputed = false;

if (!verifyInterpolationArray(x, y, false)) {
SortInPlace.ASCENDING.accept(this.x, this.y);
SortInPlace.ASCENDING.apply(this.x, this.y);
// Second check in case some abscissa is duplicated.
verifyInterpolationArray(this.x, this.y, true);
}
Expand Down Expand Up @@ -183,7 +183,7 @@ public static double evaluate(double x[], double y[], double z)
System.arraycopy(x, 0, xNew, 0, x.length);
System.arraycopy(y, 0, yNew, 0, y.length);

SortInPlace.ASCENDING.accept(xNew, yNew);
SortInPlace.ASCENDING.apply(xNew, yNew);
// Second check in case some abscissa is duplicated.
verifyInterpolationArray(xNew, yNew, true);
return evaluateInternal(xNew, yNew, z);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,9 @@ public static double[] shift(final double[] coefficients,
private static PolynomialFunction buildPolynomial(final int degree,
final List<BigFraction> coefficients,
final RecurrenceCoefficientsGenerator generator) {

// Synchronizing on a method parameter is not safe; however, in this
// case, the lock object is an immutable field that belongs to this
// class.
synchronized (coefficients) {
final int maxDegree = (int) AccurateMath.floor(AccurateMath.sqrt(2 * coefficients.size())) - 1;
if (degree > maxDegree) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,40 +46,46 @@

/**
* <p>Represents an <a href="http://en.wikipedia.org/wiki/Empirical_distribution_function">
* empirical probability distribution</a> -- a probability distribution derived
* empirical probability distribution</a>: Probability distribution derived
* from observed data without making any assumptions about the functional form
* of the population distribution that the data come from.</p>
*
* <p>An <code>EmpiricalDistribution</code> maintains data structures, called
* <i>distribution digests</i>, that describe empirical distributions and
* support the following operations: <ul>
* <li>loading the distribution from a file of observed data values</li>
* <li>dividing the input data into "bin ranges" and reporting bin frequency
* counts (data for histogram)</li>
* <li>reporting univariate statistics describing the full set of data values
* as well as the observations within each bin</li>
* <li>generating random values from the distribution</li>
* <p>An {@code EmpiricalDistribution} maintains data structures called
* <i>distribution digests</i> that describe empirical distributions and
* support the following operations:
* <ul>
* <li>loading the distribution from a file of observed data values</li>
* <li>dividing the input data into "bin ranges" and reporting bin frequency
* counts (data for histogram)</li>
* <li>reporting univariate statistics describing the full set of data values
* as well as the observations within each bin</li>
* <li>generating random values from the distribution</li>
* </ul>
* Applications can use <code>EmpiricalDistribution</code> to build grouped
*
* Applications can use {@code EmpiricalDistribution} to build grouped
* frequency histograms representing the input data or to generate random values
* "like" those in the input file -- i.e., the values generated will follow the
* "like" those in the input file, i.e. the values generated will follow the
* distribution of the values in the file.
*
* <p>The implementation uses what amounts to the
* <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
* Variable Kernel Method</a> with Gaussian smoothing:<p>
* <strong>Digesting the input file</strong>
* <ol><li>Pass the file once to compute min and max.</li>
* <li>Divide the range from min-max into <code>binCount</code> "bins."</li>
* <li>Pass the data file again, computing bin counts and univariate
* statistics (mean, std dev.) for each of the bins </li>
* <li>Divide the interval (0,1) into subintervals associated with the bins,
* with the length of a bin's subinterval proportional to its count.</li></ol>
* <strong>Generating random values from the distribution</strong><ol>
* <li>Generate a uniformly distributed value in (0,1) </li>
* <li>Select the subinterval to which the value belongs.
* <li>Generate a random Gaussian value with mean = mean of the associated
* bin and std dev = std dev of associated bin.</li></ol>
* <ol>
* <li>Pass the file once to compute min and max.</li>
* <li>Divide the range from min to max into {@code binCount} bins.</li>
* <li>Pass the data file again, computing bin counts and univariate
* statistics (mean and std dev.) for each bin.</li>
* <li>Divide the interval (0,1) into subintervals associated with the bins,
* with the length of a bin's subinterval proportional to its count.</li>
* </ol>
* <strong>Generating random values from the distribution</strong>
* <ol>
* <li>Generate a uniformly distributed value in (0,1) </li>
* <li>Select the subinterval to which the value belongs.
* <li>Generate a random Gaussian value with mean = mean of the associated
* bin and std dev = std dev of associated bin.</li>
* </ol>
*
* <p>EmpiricalDistribution implements the {@link ContinuousDistribution} interface
* as follows. Given x within the range of values in the dataset, let B
Expand All @@ -91,49 +97,36 @@
* grouped frequency distribution at the bin endpoints and interpolates within
* bins using within-bin kernels.</p>
*
*<strong>USAGE NOTES:</strong><ul>
*<li>The <code>binCount</code> is set by default to 1000. A good rule of thumb
* is to set the bin count to approximately the length of the input file divided
* by 10. </li>
*<li>The input file <i>must</i> be a plain text file containing one valid numeric
* entry per line.</li>
* <strong>USAGE NOTES:</strong>
* <ul>
* <li>The {@code binCount} is set by default to 1000. A good rule of thumb
* is to set the bin count to approximately the length of the input file divided
* by 10. </li>
* <li>The input file <i>must</i> be a plain text file containing one valid numeric
* entry per line.</li>
* </ul>
*
*/
public class EmpiricalDistribution extends AbstractRealDistribution
implements ContinuousDistribution {

/** Default bin count. */
public static final int DEFAULT_BIN_COUNT = 1000;

/** Character set for file input. */
private static final String FILE_CHARSET = "US-ASCII";

/** Serializable version identifier. */
private static final long serialVersionUID = 5729073523949762654L;

/** List of SummaryStatistics objects characterizing the bins. */
/** Bins' characteristics. */
private final List<SummaryStatistics> binStats;

/** Sample statistics. */
private SummaryStatistics sampleStats;

/** Max loaded value. */
private double max = Double.NEGATIVE_INFINITY;

/** Min loaded value. */
private double min = Double.POSITIVE_INFINITY;

/** Grid size. */
private double delta;

/** number of bins. */
/** Number of bins. */
private final int binCount;

/** is the distribution loaded? */
/** Whether the distribution is loaded. */
private boolean loaded;

/** upper bounds of subintervals in (0,1) "belonging" to the bins. */
/** Upper bounds of subintervals in (0,1) belonging to the bins. */
private double[] upperBounds;

/**
Expand Down Expand Up @@ -247,11 +240,10 @@ public void load(File file) throws IOException {
}

/**
* Provides methods for computing <code>sampleStats</code> and
* <code>beanStats</code> abstracting the source of data.
* Provides methods for computing {@code sampleStats} and
* {@code beanStats} abstracting the source of data.
*/
private abstract class DataAdapter{

private abstract class DataAdapter {
/**
* Compute bin stats.
*
Expand All @@ -265,24 +257,21 @@ private abstract class DataAdapter{
* @throws IOException if an error occurs computing sample stats
*/
public abstract void computeStats() throws IOException;

}

/**
* <code>DataAdapter</code> for data provided through some input stream.
* {@code DataAdapter} for data provided through some input stream.
*/
private class StreamDataAdapter extends DataAdapter{

private class StreamDataAdapter extends DataAdapter {
/** Input stream providing access to the data. */
private BufferedReader inputStream;
private final BufferedReader inputStream;

/**
* Create a StreamDataAdapter from a BufferedReader.
*
* @param in BufferedReader input stream
*/
StreamDataAdapter(BufferedReader in){
super();
inputStream = in;
}

Expand All @@ -298,7 +287,6 @@ public void computeBinStats() throws IOException {
}

inputStream.close();
inputStream = null;
}

/** {@inheritDoc} */
Expand All @@ -312,15 +300,13 @@ public void computeStats() throws IOException {
sampleStats.addValue(val);
}
inputStream.close();
inputStream = null;
}
}

/**
* <code>DataAdapter</code> for data provided as array of doubles.
* {@code DataAdapter} for data provided as array of doubles.
*/
private class ArrayDataAdapter extends DataAdapter {

/** Array of input data values. */
private final double[] inputArray;

Expand All @@ -331,7 +317,6 @@ private class ArrayDataAdapter extends DataAdapter {
* @throws NullArgumentException if in is null
*/
ArrayDataAdapter(double[] in) {
super();
NullArgumentException.check(in);
inputArray = in;
}
Expand All @@ -349,8 +334,7 @@ public void computeStats() throws IOException {
@Override
public void computeBinStats() throws IOException {
for (int i = 0; i < inputArray.length; i++) {
SummaryStatistics stats =
binStats.get(findBin(inputArray[i]));
SummaryStatistics stats = binStats.get(findBin(inputArray[i]));
stats.addValue(inputArray[i]);
}
}
Expand All @@ -362,34 +346,32 @@ public void computeBinStats() throws IOException {
* @param da object providing access to the data
* @throws IOException if an IO error occurs
*/
private void fillBinStats(final DataAdapter da)
throws IOException {
private void fillBinStats(final DataAdapter da) throws IOException {
// Set up grid
min = sampleStats.getMin();
max = sampleStats.getMax();
delta = (max - min)/binCount;
delta = (max - min) / binCount;

// Initialize binStats ArrayList
if (!binStats.isEmpty()) {
binStats.clear();
}
for (int i = 0; i < binCount; i++) {
SummaryStatistics stats = new SummaryStatistics();
binStats.add(i,stats);
binStats.add(i, stats);
}

// Filling data in binStats Array
da.computeBinStats();

// Assign upperBounds based on bin counts
upperBounds = new double[binCount];
upperBounds[0] =
((double) binStats.get(0).getN()) / (double) sampleStats.getN();
for (int i = 1; i < binCount-1; i++) {
upperBounds[i] = upperBounds[i-1] +
((double) binStats.get(i).getN()) / (double) sampleStats.getN();
upperBounds[0] = binStats.get(0).getN() / (double) sampleStats.getN();
for (int i = 1; i < binCount - 1; i++) {
upperBounds[i] = upperBounds[i - 1] +
binStats.get(i).getN() / (double) sampleStats.getN();
}
upperBounds[binCount-1] = 1.0d;
upperBounds[binCount - 1] = 1d;
}

/**
Expand All @@ -399,9 +381,8 @@ private void fillBinStats(final DataAdapter da)
* @return the index of the bin containing the value
*/
private int findBin(double value) {
return AccurateMath.min(
AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
binCount - 1);
return AccurateMath.min(AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
binCount - 1);
}

/**
Expand Down Expand Up @@ -490,7 +471,7 @@ public boolean isLoaded() {
return loaded;
}

// Distribution methods ---------------------------
// Distribution methods.

/**
* {@inheritDoc}
Expand Down Expand Up @@ -588,21 +569,22 @@ public double cumulativeProbability(double x) {
*/
@Override
public double inverseCumulativeProbability(final double p) {
if (p < 0.0 || p > 1.0) {
if (p < 0 ||
p > 1) {
throw new OutOfRangeException(p, 0, 1);
}

if (p == 0.0) {
if (p == 0) {
return getSupportLowerBound();
}

if (p == 1.0) {
if (p == 1) {
return getSupportUpperBound();
}

int i = 0;
while (cumBinP(i) < p) {
i++;
++i;
}

final ContinuousDistribution kernel = getKernel(binStats.get(i));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.commons.math4.legacy.ml.clustering;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

Expand All @@ -26,10 +25,7 @@
* @param <T> the type of points that can be clustered
* @since 3.2
*/
public class Cluster<T extends Clusterable> implements Serializable {

/** Serializable version identifier. */
private static final long serialVersionUID = -3442297081515880464L;
public class Cluster<T extends Clusterable> {

/** The points contained in this cluster. */
private final List<T> points;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@
*/
package org.apache.commons.math4.legacy.ml.distance;

import java.io.Serializable;

import org.apache.commons.math4.legacy.exception.DimensionMismatchException;

/**
* Interface for distance measures of n-dimensional vectors.
*
* @since 3.2
*/
public interface DistanceMeasure extends Serializable {
public interface DistanceMeasure {

/**
* Compute the distance between two n-dimensional vectors.
Expand Down
Loading

0 comments on commit ac65dca

Please sign in to comment.