diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/pq/KMeansPlusPlusClusterer.java b/jvector-base/src/main/java/io/github/jbellis/jvector/pq/KMeansPlusPlusClusterer.java index 2aebd384a..be61d1f43 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/pq/KMeansPlusPlusClusterer.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/pq/KMeansPlusPlusClusterer.java @@ -20,7 +20,6 @@ import java.util.Arrays; import java.util.List; -import java.util.Random; import java.util.concurrent.ThreadLocalRandom; /** @@ -40,9 +39,9 @@ public class KMeansPlusPlusClusterer { private final float[][] centroidNums; /** - * Constructs a KMeansPlusPlusFloatClusterer with the specified number of clusters, - * maximum iterations, and distance function. + * Constructs a KMeansPlusPlusFloatClusterer with the specified points and number of clusters. * + * @param points the points to cluster. * @param k number of clusters. */ public KMeansPlusPlusClusterer(float[][] points, int k) { @@ -50,10 +49,12 @@ public KMeansPlusPlusClusterer(float[][] points, int k) { } /** - * Constructs a KMeansPlusPlusFloatClusterer with the specified number of clusters, - * maximum iterations, and distance function. + * Constructs a KMeansPlusPlusFloatClusterer with the specified points and initial centroids. *

* The initial centroids provided as a parameter are copied before modification. + * + * @param points the points to cluster. + * @param centroids the initial centroids. */ public KMeansPlusPlusClusterer(float[][] points, float[][] centroids) { this.points = points; diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/pq/ProductQuantization.java b/jvector-base/src/main/java/io/github/jbellis/jvector/pq/ProductQuantization.java index 887adedba..7dd0d54ef 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/pq/ProductQuantization.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/pq/ProductQuantization.java @@ -19,9 +19,7 @@ import io.github.jbellis.jvector.disk.Io; import io.github.jbellis.jvector.disk.RandomAccessReader; import io.github.jbellis.jvector.graph.RandomAccessVectorValues; -import io.github.jbellis.jvector.util.ExplicitThreadLocal; import io.github.jbellis.jvector.util.PhysicalCoreExecutor; -import io.github.jbellis.jvector.util.ExplicitThreadLocal; import io.github.jbellis.jvector.util.RamUsageEstimator; import io.github.jbellis.jvector.vector.VectorUtil; @@ -47,7 +45,7 @@ public class ProductQuantization implements VectorCompressor { static final int CLUSTERS = 256; // number of clusters per subspace = one byte's worth static final int K_MEANS_ITERATIONS = 6; - static final int MAX_PQ_TRAINING_SET_SIZE = 128000; + public static final int MAX_PQ_TRAINING_SET_SIZE = 128000; final float[][][] codebooks; private final int M; // codebooks.length, redundantly reproduced for convenience