Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Making embedding optional in retrieve_online_documents #5104

Merged
merged 6 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1863,11 +1863,11 @@ def retrieve_online_documents(

def retrieve_online_documents_v2(
self,
query: Union[str, List[float]],
top_k: int,
features: List[str],
distance_metric: Optional[str] = "L2",
top_k: int,
query: Optional[List[float]] = None,
query_string: Optional[str] = None,
distance_metric: Optional[str] = "L2",
) -> OnlineResponse:
"""
Retrieves the top k closest document features. Note, embeddings are a subset of features.
Expand All @@ -1876,15 +1876,14 @@ def retrieve_online_documents_v2(
features: The list of features that should be retrieved from the online document store. These features can be
specified either as a list of string document feature references or as a feature service. String feature
references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
query: The query to retrieve the closest document features for.
query: The embeded query to retrieve the closest document features for (optional)
top_k: The number of closest document features to retrieve.
distance_metric: The distance metric to use for retrieval.
query_string: The query string to retrieve the closest document features using keyword search (bm25).
"""
if isinstance(query, str):
raise ValueError(
"Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents."
)
assert query is not None or query_string is not None, (
"Either query or query_string must be provided."
)

(
available_feature_views,
Expand Down Expand Up @@ -1988,7 +1987,7 @@ def _retrieve_from_online_store_v2(
provider: Provider,
table: FeatureView,
requested_features: List[str],
query: List[float],
query: Optional[List[float]],
top_k: int,
distance_metric: Optional[str],
query_string: Optional[str],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: List[str],
embedding: List[float],
embedding: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand All @@ -471,6 +471,7 @@ def retrieve_online_documents_v2(
Optional[Dict[str, ValueProto]],
]
]:
assert embedding is not None, "Key Word Search not yet implemented for Milvus"
entity_name_feast_primitive_type_map = {
k.name: k.dtype for k in table.entity_columns
}
Expand Down
7 changes: 5 additions & 2 deletions sdk/python/feast/infra/online_stores/online_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: List[str],
embedding: List[float],
embedding: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand All @@ -455,7 +455,7 @@ def retrieve_online_documents_v2(
config: The config for the current feature store.
table: The feature view whose feature values should be read.
requested_features: The list of features whose embeddings should be used for retrieval.
embedding: The embeddings to use for retrieval.
embedding: The embeddings to use for retrieval (optional)
top_k: The number of documents to retrieve.
query_string: The query string to search for using keyword search (bm25) (optional)

Expand All @@ -464,6 +464,9 @@ def retrieve_online_documents_v2(
where the first item is the event timestamp for the row, and the second item is a dict of feature
name to embeddings.
"""
assert embedding is not None or query_string is not None, (
"Either embedding or query_string must be specified"
)
raise NotImplementedError(
f"Online store {self.__class__.__name__} does not support online retrieval"
)
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/infra/online_stores/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: List[str],
query: List[float],
query: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand All @@ -456,7 +456,7 @@ def retrieve_online_documents_v2(
config: Feast configuration object
table: FeatureView object as the table to search
requested_features: List of requested features to retrieve
query: Query embedding to search for
query: Query embedding to search for (optional)
top_k: Number of items to return
distance_metric: Distance metric to use (optional)
query_string: The query string to search for using keyword search (bm25) (optional)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/passthrough_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: Optional[List[str]],
query: List[float],
query: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/infra/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: List[str],
query: List[float],
query: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand All @@ -475,7 +475,7 @@ def retrieve_online_documents_v2(
config: The config for the current feature store.
table: The feature view whose embeddings should be searched.
requested_features: the requested document feature names.
query: The query embedding to search for.
query: The query embedding to search for (optional).
top_k: The number of documents to return.
query_string: The query string to search for using keyword search (bm25) (optional)

Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/foo_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def retrieve_online_documents_v2(
config: RepoConfig,
table: FeatureView,
requested_features: List[str],
query: List[float],
query: Optional[List[float]],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
Expand Down
7 changes: 3 additions & 4 deletions sdk/python/tests/unit/online_store/test_online_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,14 +979,13 @@ def test_sqlite_get_online_documents_v2_search() -> None:
)

# Test vector similarity search
query_embedding = [float(x) for x in np.random.random(vector_length)]
# query_embedding = [float(x) for x in np.random.random(vector_length)]
result = store.retrieve_online_documents_v2(
features=[
"document_embeddings:Embeddings",
"document_embeddings:content",
"document_embeddings:title",
],
query=query_embedding,
query_string="(content: 5) OR (title: 1) OR (title: 3)",
top_k=3,
).to_dict()
Expand Down Expand Up @@ -1279,12 +1278,12 @@ def test_milvus_native_from_feast_data() -> None:
search_res = client.search(
collection_name=COLLECTION_NAME,
data=[query_embedding],
limit=3, # Top 3 results
limit=5, # Top 3 results
output_fields=["item_id", "author_id", "sentence_chunks"],
)

# Validate the search results
assert len(search_res[0]) == 3
assert len(search_res[0]) == 5
print("Search Results:", search_res[0])

# Clean up the collection
Expand Down
Loading