Skip to content

Commit

Permalink
Merge branch 'opensearch-project:main' into knn-2263
Browse files Browse the repository at this point in the history
  • Loading branch information
markwu-sde authored Feb 7, 2025
2 parents 9964c91 + 55652ca commit 058e9d8
Show file tree
Hide file tree
Showing 39 changed files with 3,515 additions and 56 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add expand_nested_docs Parameter support to NMSLIB engine (#2331)[https://github.com/opensearch-project/k-NN/pull/2331]
- Add a new build mode, `FAISS_OPT_LEVEL=avx512_spr`, which enables the use of advanced AVX-512 instructions introduced with Intel(R) Sapphire Rapids (#2404)[https://github.com/opensearch-project/k-NN/pull/2404]
- Add cosine similarity support for faiss engine (#2376)[https://github.com/opensearch-project/k-NN/pull/2376]
- Add derived source feature for vector fields (#2449)[https://github.com/opensearch-project/k-NN/pull/2449]
### Enhancements
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
- Allow method parameter override for training based indices (#2290) https://github.com/opensearch-project/k-NN/pull/2290]
Expand All @@ -44,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2374](https://github.com/opensearch-project/k-NN/pull/2374)
* Fixing the bug where setting rescore as false for on_disk knn_vector query is a no-op (#2399)[https://github.com/opensearch-project/k-NN/pull/2399]
* Fixing bug where mapping accepts both dimension and model-id (#2410)[https://github.com/opensearch-project/k-NN/pull/2410]
* Add version check for full field name validation (#2477)[https://github.com/opensearch-project/k-NN/pull/2477]
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ integTest {
systemProperty("https", is_https)
systemProperty("user", user)
systemProperty("password", password)
systemProperty("test.exhaustive", System.getProperty("test.exhaustive"))

doFirst {
// Tell the test JVM if the cluster JVM is running under a debugger so that tests can
Expand Down Expand Up @@ -466,6 +467,7 @@ task integTestRemote(type: RestIntegTestTask) {
systemProperty 'cluster.number_of_nodes', "${_numNodes}"

systemProperty 'tests.security.manager', 'false'
systemProperty("test.exhaustive", System.getProperty("test.exhaustive"))

// Run tests with remote cluster only if rest case is defined
if (System.getProperty("tests.rest.cluster") != null) {
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,8 @@ public class KNNConstants {

public static final String MODE_PARAMETER = "mode";
public static final String COMPRESSION_LEVEL_PARAMETER = "compression_level";

public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY = "knn-derived-source-enabled";
public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE = "true";
public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_FALSE_VALUE = "false";
}
24 changes: 23 additions & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import static org.opensearch.common.settings.Setting.Property.IndexScope;
import static org.opensearch.common.settings.Setting.Property.NodeScope;
import static org.opensearch.common.settings.Setting.Property.Final;
import static org.opensearch.common.settings.Setting.Property.UnmodifiableOnRestore;
import static org.opensearch.common.unit.MemorySizeValue.parseBytesSizeValueOrHeapRatio;
import static org.opensearch.core.common.unit.ByteSizeValue.parseBytesSizeValue;
import static org.opensearch.knn.common.featureflags.KNNFeatureFlags.getFeatureFlags;
Expand Down Expand Up @@ -93,6 +94,7 @@ public class KNNSettings {
public static final String KNN_FAISS_AVX512_DISABLED = "knn.faiss.avx512.disabled";
public static final String KNN_FAISS_AVX512_SPR_DISABLED = "knn.faiss.avx512_spr.disabled";
public static final String KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED = "index.knn.disk.vector.shard_level_rescoring_disabled";
public static final String KNN_DERIVED_SOURCE_ENABLED = "index.knn.derived_source.enabled";

/**
* Default setting values
Expand Down Expand Up @@ -270,6 +272,14 @@ public class KNNSettings {
Setting.Property.Dynamic
);

public static final Setting<Boolean> KNN_DERIVED_SOURCE_ENABLED_SETTING = Setting.boolSetting(
KNN_DERIVED_SOURCE_ENABLED,
false,
IndexScope,
Final,
UnmodifiableOnRestore
);

/**
* This setting identifies KNN index.
*/
Expand Down Expand Up @@ -528,6 +538,9 @@ private Setting<?> getSetting(String key) {
if (KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED.equals(key)) {
return KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING;
}
if (KNN_DERIVED_SOURCE_ENABLED.equals(key)) {
return KNN_DERIVED_SOURCE_ENABLED_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}
Expand All @@ -553,7 +566,8 @@ public List<Setting<?>> getSettings() {
KNN_FAISS_AVX512_SPR_DISABLED_SETTING,
QUANTIZATION_STATE_CACHE_SIZE_LIMIT_SETTING,
QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING,
KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING
KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING,
KNN_DERIVED_SOURCE_ENABLED_SETTING
);
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
.collect(Collectors.toList());
Expand Down Expand Up @@ -704,6 +718,14 @@ public static boolean isFaissAVX2Disabled() {
}
}

/**
* check this index enabled/disabled derived source
* @param settings Settings
*/
public static boolean isKNNDerivedSourceEnabled(Settings settings) {
return KNN_DERIVED_SOURCE_ENABLED_SETTING.get(settings);
}

public static boolean isFaissAVX512Disabled() {
return Booleans.parseBoolean(
Objects.requireNonNullElse(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNN9120Codec.DerivedSourceStoredFieldsFormat;
import org.opensearch.knn.index.codec.KNNCodecVersion;
import org.opensearch.knn.index.codec.KNNFormatFacade;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;

/**
* KNN Codec that wraps the Lucene Codec which is part of Lucene 10.0.1
Expand All @@ -24,12 +28,15 @@ public class KNN10010Codec extends FilterCodec {
private static final KNNCodecVersion VERSION = KNNCodecVersion.V_10_01_0;
private final KNNFormatFacade knnFormatFacade;
private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;
private final StoredFieldsFormat storedFieldsFormat;

private final MapperService mapperService;

/**
* No arg constructor that uses Lucene99 as the delegate
*/
public KNN10010Codec() {
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat());
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat(), null);
}

/**
Expand All @@ -40,10 +47,12 @@ public KNN10010Codec() {
* @param knnVectorsFormat per field format for KnnVector
*/
@Builder
protected KNN10010Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) {
protected KNN10010Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat, MapperService mapperService) {
super(VERSION.getCodecName(), delegate);
knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate);
perFieldKnnVectorsFormat = knnVectorsFormat;
this.mapperService = mapperService;
this.storedFieldsFormat = getStoredFieldsFormat();
}

@Override
Expand All @@ -60,4 +69,36 @@ public CompoundFormat compoundFormat() {
public KnnVectorsFormat knnVectorsFormat() {
return perFieldKnnVectorsFormat;
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

private StoredFieldsFormat getStoredFieldsFormat() {
DerivedSourceReadersSupplier derivedSourceReadersSupplier = new DerivedSourceReadersSupplier((segmentReadState) -> {
if (segmentReadState.fieldInfos.hasVectorValues()) {
return knnVectorsFormat().fieldsReader(segmentReadState);
}
return null;
}, (segmentReadState) -> {
if (segmentReadState.fieldInfos.hasDocValues()) {
return docValuesFormat().fieldsProducer(segmentReadState);
}
return null;

}, (segmentReadState) -> {
if (segmentReadState.fieldInfos.hasPostings()) {
return postingsFormat().fieldsProducer(segmentReadState);
}
return null;

}, (segmentReadState -> {
if (segmentReadState.fieldInfos.hasNorms()) {
return normsFormat().normsProducer(segmentReadState);
}
return null;
}));
return new DerivedSourceStoredFieldsFormat(delegate.storedFieldsFormat(), derivedSourceReadersSupplier, mapperService);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN9120Codec;

import lombok.AllArgsConstructor;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.opensearch.common.Nullable;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;
import org.opensearch.knn.index.mapper.KNNVectorFieldType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY;
import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE;

@AllArgsConstructor
public class DerivedSourceStoredFieldsFormat extends StoredFieldsFormat {

private final StoredFieldsFormat delegate;
private final DerivedSourceReadersSupplier derivedSourceReadersSupplier;
// IMPORTANT Do not rely on this for the reader, it will be null if SPI is used
@Nullable
private final MapperService mapperService;

@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext ioContext)
throws IOException {
List<FieldInfo> derivedVectorFields = null;
for (FieldInfo fieldInfo : fieldInfos) {
if (DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE.equals(fieldInfo.attributes().get(DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY))) {
// Lazily initialize the list of fields
if (derivedVectorFields == null) {
derivedVectorFields = new ArrayList<>();
}
derivedVectorFields.add(fieldInfo);
}
}
// If no fields have it enabled, we can just short-circuit and return the delegate's fieldReader
if (derivedVectorFields == null || derivedVectorFields.isEmpty()) {
return delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext);
}
return new DerivedSourceStoredFieldsReader(
delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext),
derivedVectorFields,
derivedSourceReadersSupplier,
new SegmentReadState(directory, segmentInfo, fieldInfos, ioContext)
);
}

@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext ioContext) throws IOException {
StoredFieldsWriter delegateWriter = delegate.fieldsWriter(directory, segmentInfo, ioContext);
if (mapperService != null && KNNSettings.isKNNDerivedSourceEnabled(mapperService.getIndexSettings().getSettings())) {
List<String> vectorFieldTypes = new ArrayList<>();
for (MappedFieldType fieldType : mapperService.fieldTypes()) {
if (fieldType instanceof KNNVectorFieldType) {
vectorFieldTypes.add(fieldType.name());
}
}
if (vectorFieldTypes.isEmpty() == false) {
return new DerivedSourceStoredFieldsWriter(delegateWriter, vectorFieldTypes);
}
}
return delegateWriter;
}
}
Loading

0 comments on commit 058e9d8

Please sign in to comment.