From 7cc66c85e95b948a438b400b098c3511ab970983 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 26 Feb 2025 22:04:55 -0500 Subject: [PATCH 1/6] chore: Making embedding optional in retrieve_online_documents Signed-off-by: Francisco Javier Arceo --- .../infra/online_stores/milvus_online_store/milvus.py | 3 ++- sdk/python/feast/infra/online_stores/online_store.py | 7 +++++-- sdk/python/feast/infra/online_stores/sqlite.py | 4 ++-- sdk/python/feast/infra/passthrough_provider.py | 2 +- sdk/python/feast/infra/provider.py | 4 ++-- sdk/python/tests/foo_provider.py | 2 +- .../tests/unit/online_store/test_online_retrieval.py | 4 ++-- 7 files changed, 15 insertions(+), 11 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py b/sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py index 9b9f003ebb..e9087094b5 100644 --- a/sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py +++ b/sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py @@ -460,7 +460,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: List[str], - embedding: List[float], + embedding: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, @@ -471,6 +471,7 @@ def retrieve_online_documents_v2( Optional[Dict[str, ValueProto]], ] ]: + assert embedding is not None, "Key Word Search not yet implemented for Milvus" entity_name_feast_primitive_type_map = { k.name: k.dtype for k in table.entity_columns } diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index 7165ef59a3..5111bcd47b 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -436,7 +436,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: List[str], - embedding: List[float], + embedding: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, @@ -455,7 +455,7 @@ def retrieve_online_documents_v2( config: The config for the current feature store. table: The feature view whose feature values should be read. requested_features: The list of features whose embeddings should be used for retrieval. - embedding: The embeddings to use for retrieval. + embedding: The embeddings to use for retrieval (optional) top_k: The number of documents to retrieve. query_string: The query string to search for using keyword search (bm25) (optional) @@ -464,6 +464,9 @@ def retrieve_online_documents_v2( where the first item is the event timestamp for the row, and the second item is a dict of feature name to embeddings. """ + assert embedding is not None or query_string is not None, ( + "Either embedding or query_string must be specified" + ) raise NotImplementedError( f"Online store {self.__class__.__name__} does not support online retrieval" ) diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 87da851ce5..046666d5d8 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -439,7 +439,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: List[str], - query: List[float], + query: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, @@ -456,7 +456,7 @@ def retrieve_online_documents_v2( config: Feast configuration object table: FeatureView object as the table to search requested_features: List of requested features to retrieve - query: Query embedding to search for + query: Query embedding to search for (optional) top_k: Number of items to return distance_metric: Distance metric to use (optional) query_string: The query string to search for using keyword search (bm25) (optional) diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 2a896a58b0..c0c5412928 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -318,7 +318,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: Optional[List[str]], - query: List[float], + query: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 5155e76ddd..78039e1b87 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -456,7 +456,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: List[str], - query: List[float], + query: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, @@ -475,7 +475,7 @@ def retrieve_online_documents_v2( config: The config for the current feature store. table: The feature view whose embeddings should be searched. requested_features: the requested document feature names. - query: The query embedding to search for. + query: The query embedding to search for (optional). top_k: The number of documents to return. query_string: The query string to search for using keyword search (bm25) (optional) diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index df8edf1232..2aa674c0aa 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -169,7 +169,7 @@ def retrieve_online_documents_v2( config: RepoConfig, table: FeatureView, requested_features: List[str], - query: List[float], + query: Optional[List[float]], top_k: int, distance_metric: Optional[str] = None, query_string: Optional[str] = None, diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index c88cc57ef1..e874023d66 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -1279,12 +1279,12 @@ def test_milvus_native_from_feast_data() -> None: search_res = client.search( collection_name=COLLECTION_NAME, data=[query_embedding], - limit=3, # Top 3 results + limit=5, # Top 3 results output_fields=["item_id", "author_id", "sentence_chunks"], ) # Validate the search results - assert len(search_res[0]) == 3 + assert len(search_res[0]) == 5 print("Search Results:", search_res[0]) # Clean up the collection From d8d0416862ae789221a04eea9310361d39feee0c Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 26 Feb 2025 22:17:25 -0500 Subject: [PATCH 2/6] updated Signed-off-by: Francisco Javier Arceo --- sdk/python/feast/feature_store.py | 11 ++++------- .../tests/unit/online_store/test_online_retrieval.py | 5 +++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index c3a8cd7a2b..f75ddcfeea 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1863,7 +1863,7 @@ def retrieve_online_documents( def retrieve_online_documents_v2( self, - query: Union[str, List[float]], + query: Optional[List[float]], top_k: int, features: List[str], distance_metric: Optional[str] = "L2", @@ -1876,15 +1876,12 @@ def retrieve_online_documents_v2( features: The list of features that should be retrieved from the online document store. These features can be specified either as a list of string document feature references or as a feature service. String feature references must have format "feature_view:feature", e.g, "document_fv:document_embeddings". - query: The query to retrieve the closest document features for. + query: The query to retrieve the closest document features for (optional) top_k: The number of closest document features to retrieve. distance_metric: The distance metric to use for retrieval. query_string: The query string to retrieve the closest document features using keyword search (bm25). """ - if isinstance(query, str): - raise ValueError( - "Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents." - ) + assert query or query_string, "Either query or query_string must be provided." ( available_feature_views, @@ -1988,7 +1985,7 @@ def _retrieve_from_online_store_v2( provider: Provider, table: FeatureView, requested_features: List[str], - query: List[float], + query: Optional[List[float]], top_k: int, distance_metric: Optional[str], query_string: Optional[str], diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index e874023d66..0d6f1b5fab 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -979,14 +979,15 @@ def test_sqlite_get_online_documents_v2_search() -> None: ) # Test vector similarity search - query_embedding = [float(x) for x in np.random.random(vector_length)] + # query_embedding = [float(x) for x in np.random.random(vector_length)] result = store.retrieve_online_documents_v2( features=[ "document_embeddings:Embeddings", "document_embeddings:content", "document_embeddings:title", ], - query=query_embedding, + query=None, + # query=query_embedding, query_string="(content: 5) OR (title: 1) OR (title: 3)", top_k=3, ).to_dict() From 7821b1bb647cc7dd51d6dcbe3a3eded2bdc925ed Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 27 Feb 2025 05:41:47 -0500 Subject: [PATCH 3/6] adjusting syntax Signed-off-by: Francisco Javier Arceo --- sdk/python/feast/feature_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index f75ddcfeea..0242cad936 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1881,7 +1881,7 @@ def retrieve_online_documents_v2( distance_metric: The distance metric to use for retrieval. query_string: The query string to retrieve the closest document features using keyword search (bm25). """ - assert query or query_string, "Either query or query_string must be provided." + assert query is not None or query_string is not None, "Either query or query_string must be provided." ( available_feature_views, From a62d5255665b2137cbc7754682d0cad7787b979f Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 27 Feb 2025 08:09:04 -0500 Subject: [PATCH 4/6] linter Signed-off-by: Francisco Javier Arceo --- sdk/python/feast/feature_store.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 0242cad936..4f630e2761 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1881,7 +1881,9 @@ def retrieve_online_documents_v2( distance_metric: The distance metric to use for retrieval. query_string: The query string to retrieve the closest document features using keyword search (bm25). """ - assert query is not None or query_string is not None, "Either query or query_string must be provided." + assert query is not None or query_string is not None, ( + "Either query or query_string must be provided." + ) ( available_feature_views, From 4be462dcff935d867af84092c888944d17ec3ee0 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 27 Feb 2025 08:47:51 -0500 Subject: [PATCH 5/6] adjust ed order and set query with default Signed-off-by: Francisco Javier Arceo --- sdk/python/feast/feature_store.py | 6 +++--- sdk/python/tests/unit/online_store/test_online_retrieval.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 4f630e2761..5f2619fa77 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1863,11 +1863,11 @@ def retrieve_online_documents( def retrieve_online_documents_v2( self, - query: Optional[List[float]], - top_k: int, features: List[str], - distance_metric: Optional[str] = "L2", + top_k: int, + query: Optional[List[float]] = None, query_string: Optional[str] = None, + distance_metric: Optional[str] = "L2", ) -> OnlineResponse: """ Retrieves the top k closest document features. Note, embeddings are a subset of features. diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 0d6f1b5fab..ea76ed6f54 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -986,8 +986,6 @@ def test_sqlite_get_online_documents_v2_search() -> None: "document_embeddings:content", "document_embeddings:title", ], - query=None, - # query=query_embedding, query_string="(content: 5) OR (title: 1) OR (title: 3)", top_k=3, ).to_dict() From c87a2bd8a4d5ba1ce0ec303fe1e8539d207bb077 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 27 Feb 2025 10:24:39 -0500 Subject: [PATCH 6/6] ujpdated doc string Signed-off-by: Francisco Javier Arceo --- sdk/python/feast/feature_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 5f2619fa77..20db8292ca 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1876,7 +1876,7 @@ def retrieve_online_documents_v2( features: The list of features that should be retrieved from the online document store. These features can be specified either as a list of string document feature references or as a feature service. String feature references must have format "feature_view:feature", e.g, "document_fv:document_embeddings". - query: The query to retrieve the closest document features for (optional) + query: The embeded query to retrieve the closest document features for (optional) top_k: The number of closest document features to retrieve. distance_metric: The distance metric to use for retrieval. query_string: The query string to retrieve the closest document features using keyword search (bm25).