Skip to content

Commit

Permalink
Get rid of VectorByte graphs. We only use VectorBytes as sequences of…
Browse files Browse the repository at this point in the history
… bytes now.
  • Loading branch information
jkni committed Feb 7, 2024
1 parent 9791307 commit e55fe6b
Show file tree
Hide file tree
Showing 47 changed files with 376 additions and 908 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public CachedNode(VectorFloat<?> vector, int[] neighbors, VectorByte<?> packedNe
/** return the cached node if present, or null if not */
public abstract CachedNode getNode(int ordinal);

public static ADCGraphCache load(OnDiskADCGraphIndex<VectorFloat<?>> graph, int distance) throws IOException
public static ADCGraphCache load(OnDiskADCGraphIndex graph, int distance) throws IOException
{
if (distance < 0)
return new EmptyGraphCache();
Expand Down Expand Up @@ -74,7 +74,7 @@ private static final class HMGraphCache extends ADCGraphCache
private final Int2ObjectHashMap<CachedNode> cache;
private long ramBytesUsed = 0;

public HMGraphCache(OnDiskADCGraphIndex<VectorFloat<?>> graph, int distance) {
public HMGraphCache(OnDiskADCGraphIndex graph, int distance) {
try (var view = graph.getView()) {
Int2ObjectHashMap<CachedNode> tmpCache = new Int2ObjectHashMap<>();
cacheNeighborsOf(tmpCache, view, view.entryNode(), distance);
Expand All @@ -85,7 +85,7 @@ public HMGraphCache(OnDiskADCGraphIndex<VectorFloat<?>> graph, int distance) {
}
}

private void cacheNeighborsOf(Int2ObjectHashMap<CachedNode> tmpCache, ADCView<VectorFloat<?>> view, int ordinal, int distance) {
private void cacheNeighborsOf(Int2ObjectHashMap<CachedNode> tmpCache, ADCView view, int ordinal, int distance) {
// cache this node
var it = view.getNeighborsIterator(ordinal);
int[] neighbors = new int[it.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@
* TODO: Refactor so that caching is pluggable for different GraphIndex implementations.
*/
@Experimental
public class CachingADCGraphIndex implements GraphIndex<VectorFloat<?>>, AutoCloseable, Accountable
public class CachingADCGraphIndex implements GraphIndex, AutoCloseable, Accountable
{
private static final int CACHE_DISTANCE = 3;

private final ADCGraphCache cache;
private final OnDiskADCGraphIndex<VectorFloat<?>> graph;
private final OnDiskADCGraphIndex graph;

public CachingADCGraphIndex(OnDiskADCGraphIndex<VectorFloat<?>> graph)
public CachingADCGraphIndex(OnDiskADCGraphIndex graph)
{
this.graph = graph;
try {
Expand Down Expand Up @@ -86,10 +86,10 @@ public void close() throws IOException {
graph.close();
}

public class CachedView implements ADCView<VectorFloat<?>>, ApproximateScoreProvider {
private final ADCView<VectorFloat<?>> view;
public class CachedView implements ADCView, ApproximateScoreProvider {
private final ADCView view;

public CachedView(ADCView<VectorFloat<?>> view) {
public CachedView(ADCView view) {
this.view = view;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@
import java.io.IOException;
import java.io.UncheckedIOException;

public class CachingGraphIndex implements GraphIndex<VectorFloat<?>>, AutoCloseable, Accountable
public class CachingGraphIndex implements GraphIndex, AutoCloseable, Accountable
{
private static final int CACHE_DISTANCE = 3;

private final GraphCache cache;
private final OnDiskGraphIndex<VectorFloat<?>> graph;
private final OnDiskGraphIndex graph;

public CachingGraphIndex(OnDiskGraphIndex<VectorFloat<?>> graph)
public CachingGraphIndex(OnDiskGraphIndex graph)
{
this(graph, CACHE_DISTANCE);
}

public CachingGraphIndex(OnDiskGraphIndex<VectorFloat<?>> graph, int cacheDistance)
public CachingGraphIndex(OnDiskGraphIndex graph, int cacheDistance)
{
this.graph = graph;
try {
Expand All @@ -58,7 +58,7 @@ public NodesIterator getNodes() {
}

@Override
public View<VectorFloat<?>> getView() {
public View getView() {
return new CachedView(graph.getView());
}

Expand All @@ -77,10 +77,10 @@ public void close() throws IOException {
graph.close();
}

private class CachedView implements View<VectorFloat<?>> {
private final View<VectorFloat<?>> view;
private class CachedView implements View {
private final View view;

public CachedView(View<VectorFloat<?>> view) {
public CachedView(View view) {
this.view = view;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public CachedNode(VectorFloat<?> vector, int[] neighbors) {
/** return the cached node if present, or null if not */
public abstract CachedNode getNode(int ordinal);

public static GraphCache load(GraphIndex<VectorFloat<?>> graph, int distance) throws IOException
public static GraphCache load(GraphIndex graph, int distance) throws IOException
{
if (distance < 0)
return new EmptyGraphCache();
Expand Down Expand Up @@ -68,7 +68,7 @@ private static final class HMGraphCache extends GraphCache
private final Int2ObjectHashMap<CachedNode> cache;
private long ramBytesUsed = 0;

public HMGraphCache(GraphIndex<VectorFloat<?>> graph, int distance) {
public HMGraphCache(GraphIndex graph, int distance) {
try (var view = graph.getView()) {
Int2ObjectHashMap<CachedNode> tmpCache = new Int2ObjectHashMap<>();
cacheNeighborsOf(tmpCache, view, view.entryNode(), distance);
Expand All @@ -79,7 +79,7 @@ public HMGraphCache(GraphIndex<VectorFloat<?>> graph, int distance) {
}
}

private void cacheNeighborsOf(Int2ObjectHashMap<CachedNode> tmpCache, GraphIndex.View<VectorFloat<?>> view, int ordinal, int distance) {
private void cacheNeighborsOf(Int2ObjectHashMap<CachedNode> tmpCache, GraphIndex.View view, int ordinal, int distance) {
// cache this node
var it = view.getNeighborsIterator(ordinal);
int[] neighbors = new int[it.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@
* entry points and it's fine to go to disk.
* TODO: Permit maxDegree != 32.
* TODO: Permit 256 PQ clusters by quantizing floats to one byte.
* @param <T> vector type
*/
@Experimental
public class OnDiskADCGraphIndex<T> implements GraphIndex<T>, AutoCloseable, Accountable
public class OnDiskADCGraphIndex implements GraphIndex, AutoCloseable, Accountable
{
private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();
private final ReaderSupplier readerSupplier;
Expand Down Expand Up @@ -93,7 +92,7 @@ public OnDiskADCGraphIndex(ReaderSupplier readerSupplier, long offset)
* while preserving the original relative ordering in `graph`. That is, for all node ids i and j,
* if i &lt; j in `graph` then map[i] &lt; map[j] in the returned map.
*/
public static <T> Map<Integer, Integer> getSequentialRenumbering(GraphIndex<T> graph) {
public static <T> Map<Integer, Integer> getSequentialRenumbering(GraphIndex graph) {
try (var view = graph.getView()) {
Int2IntHashMap oldToNewMap = new Int2IntHashMap(-1);
int nextOrdinal = 0;
Expand Down Expand Up @@ -124,8 +123,7 @@ public OnDiskView getView()
return new OnDiskView(readerSupplier.get());
}

// TODO: This is fake generic until the reading functionality uses T
public class OnDiskView implements ADCView<T>, ApproximateScoreProvider, AutoCloseable
public class OnDiskView implements ADCView, ApproximateScoreProvider, AutoCloseable
{
private final RandomAccessReader reader;
private final int[] neighbors;
Expand All @@ -139,13 +137,13 @@ public OnDiskView(RandomAccessReader reader)
this.packedNeighbors = vectorTypeSupport.createByteVector(maxDegree * pqv.getCompressedSize());
}

public T getVector(int node) {
public VectorFloat<?> getVector(int node) {
try {
long offset = neighborsOffset +
node * (Integer.BYTES + (long) dimension * Float.BYTES + pqv.getCompressedSize() * maxDegree + (long) Integer.BYTES * (maxDegree + 1)) // earlier entries
+ Integer.BYTES; // skip the ID
reader.seek(offset);
return (T) vectorTypeSupport.readFloatVector(reader, dimension);
return vectorTypeSupport.readFloatVector(reader, dimension);
}
catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down Expand Up @@ -182,7 +180,7 @@ public VectorByte<?> getPackedNeighbors(int node) {

@Override
public NodeSimilarity.ApproximateScoreFunction approximateScoreFunctionFor(VectorFloat<?> query, VectorSimilarityFunction similarityFunction) {
return QuickADCPQDecoder.newDecoder((ADCView<VectorFloat<?>>) this, query, similarityFunction);
return QuickADCPQDecoder.newDecoder(this, query, similarityFunction);
}

public VectorFloat<?> reusableResults() {
Expand Down Expand Up @@ -236,7 +234,7 @@ public void close() throws IOException {
*
* If any nodes have been deleted, you must use the overload specifying `oldToNewOrdinals` instead.
*/
public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> vectors, PQVectors pqVectors, DataOutput out)
public static <T> void write(GraphIndex graph, RandomAccessVectorValues vectors, PQVectors pqVectors, DataOutput out)
throws IOException
{
try (var view = graph.getView()) {
Expand All @@ -259,8 +257,8 @@ public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> ve
* compressed representations are embedded in the serialized graph to support accelerated ADC.
* @param out the output to write to
*/
public static <T> void write(GraphIndex<T> graph,
RandomAccessVectorValues<T> vectors,
public static <T> void write(GraphIndex graph,
RandomAccessVectorValues vectors,
Map<Integer, Integer> oldToNewOrdinals,
PQVectors pqVectors,
DataOutput out)
Expand All @@ -275,7 +273,7 @@ public static <T> void write(GraphIndex<T> graph,
}

if (graph instanceof OnHeapGraphIndex) {
var ohgi = (OnHeapGraphIndex<T>) graph;
var ohgi = (OnHeapGraphIndex) graph;
if (ohgi.getDeletedNodes().cardinality() > 0) {
throw new IllegalArgumentException("Run builder.cleanup() before writing the graph");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import java.util.Map;
import java.util.stream.IntStream;

public class OnDiskGraphIndex<T> implements GraphIndex<T>, AutoCloseable, Accountable
public class OnDiskGraphIndex implements GraphIndex, AutoCloseable, Accountable
{
private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();
private final ReaderSupplier readerSupplier;
Expand Down Expand Up @@ -65,7 +65,7 @@ public OnDiskGraphIndex(ReaderSupplier readerSupplier, long offset)
* while preserving the original relative ordering in `graph`. That is, for all node ids i and j,
* if i &lt; j in `graph` then map[i] &lt; map[j] in the returned map.
*/
public static <T> Map<Integer, Integer> getSequentialRenumbering(GraphIndex<T> graph) {
public static <T> Map<Integer, Integer> getSequentialRenumbering(GraphIndex graph) {
try (var view = graph.getView()) {
Int2IntHashMap oldToNewMap = new Int2IntHashMap(-1);
int nextOrdinal = 0;
Expand All @@ -91,13 +91,12 @@ public int maxDegree() {
}

/** return a Graph that can be safely queried concurrently */
public OnDiskGraphIndex<T>.OnDiskView getView()
public OnDiskGraphIndex.OnDiskView getView()
{
return new OnDiskView(readerSupplier.get());
}

// TODO: This is fake generic until the reading functionality uses T
public class OnDiskView implements GraphIndex.View<T>, AutoCloseable
public class OnDiskView implements GraphIndex.View, AutoCloseable
{
private final RandomAccessReader reader;
private final int[] neighbors;
Expand All @@ -109,13 +108,13 @@ public OnDiskView(RandomAccessReader reader)
this.neighbors = new int[maxDegree];
}

public T getVector(int node) {
public VectorFloat<?> getVector(int node) {
try {
long offset = neighborsOffset +
node * (Integer.BYTES + (long) dimension * Float.BYTES + (long) Integer.BYTES * (maxDegree + 1)) // earlier entries
+ Integer.BYTES; // skip the ID
reader.seek(offset);
return (T) vectorTypeSupport.readFloatVector(reader, dimension);
return vectorTypeSupport.readFloatVector(reader, dimension);
}
catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down Expand Up @@ -180,7 +179,7 @@ public void close() throws IOException {
*
* If any nodes have been deleted, you must use the overload specifying `oldToNewOrdinals` instead.
*/
public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> vectors, DataOutput out)
public static <T> void write(GraphIndex graph, RandomAccessVectorValues vectors, DataOutput out)
throws IOException
{
try (var view = graph.getView()) {
Expand All @@ -201,14 +200,14 @@ public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> ve
* any deleted nodes.
* @param out the output to write to
*/
public static <T> void write(GraphIndex<T> graph,
RandomAccessVectorValues<T> vectors,
public static void write(GraphIndex graph,
RandomAccessVectorValues vectors,
Map<Integer, Integer> oldToNewOrdinals,
DataOutput out)
throws IOException
{
if (graph instanceof OnHeapGraphIndex) {
var ohgi = (OnHeapGraphIndex<T>) graph;
var ohgi = (OnHeapGraphIndex) graph;
if (ohgi.getDeletedNodes().cardinality() > 0) {
throw new IllegalArgumentException("Run builder.cleanup() before writing the graph");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import io.github.jbellis.jvector.vector.types.VectorByte;
import io.github.jbellis.jvector.vector.types.VectorFloat;

public interface ADCView<T> extends GraphIndex.View<T> {
public interface ADCView extends GraphIndex.View {
VectorByte<?> getPackedNeighbors(int node);
VectorFloat<?> reusableResults();
PQVectors getPQVectors();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
package io.github.jbellis.jvector.graph;

import io.github.jbellis.jvector.util.Bits;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.IOException;
import java.lang.ref.Cleaner;

/**
* Represents a graph-based vector index. Nodes are represented as ints, and edges are
Expand All @@ -39,11 +39,7 @@
* All methods are threadsafe. Operations that require persistent state are wrapped
* in a View that should be created per accessing thread.
*/
public interface GraphIndex<T> extends AutoCloseable {
// Cleaner used for closing GraphIndex Views in unstructured contexts.
// This includes Views used for score functions as well as searchers.
Cleaner cleaner = Cleaner.create();

public interface GraphIndex extends AutoCloseable {
/** Returns the number of nodes in the graph */
int size();

Expand All @@ -58,7 +54,7 @@ public interface GraphIndex<T> extends AutoCloseable {
/**
* Return a View with which to navigate the graph. Views are not threadsafe.
*/
View<T> getView();
View getView();

/**
* @return the maximum number of edges per node
Expand All @@ -83,7 +79,7 @@ default boolean containsNode(int nodeId) {
@Override
void close() throws IOException;

interface View<T> extends AutoCloseable {
interface View extends AutoCloseable {
/**
* Iterator over the neighbors of a given node. Only the most recently instantiated iterator
* is guaranteed to be valid.
Expand All @@ -107,7 +103,7 @@ interface View<T> extends AutoCloseable {
* In that situation, we will want to reorder the results by the exact similarity
* at the end of the search.
*/
T getVector(int node);
VectorFloat<?> getVector(int node);

/**
* Return a Bits instance indicating which nodes are live. The result is undefined for
Expand All @@ -123,7 +119,7 @@ default int getIdUpperBound() {
}
}

static <T> String prettyPrint(GraphIndex<T> graph) {
static <T> String prettyPrint(GraphIndex graph) {
StringBuilder sb = new StringBuilder();
sb.append(graph);
sb.append("\n");
Expand Down
Loading

0 comments on commit e55fe6b

Please sign in to comment.