Skip to content

Commit

Permalink
feat: Updating retrieve online documents v2 to work for other fields …
Browse files Browse the repository at this point in the history
…for sq… (#5082)

* Updating retrieve online documents v2 to work for other fields for sqlite

Signed-off-by: Francisco Javier Arceo <[email protected]>

* updating tests...not working entirely yet but close

Signed-off-by: Francisco Javier Arceo <[email protected]>

* bug fix for addition of new features

Signed-off-by: Francisco Javier Arceo <[email protected]>

* updated to implement full text search

Signed-off-by: Francisco Javier Arceo <[email protected]>

---------

Signed-off-by: Francisco Javier Arceo <[email protected]>
  • Loading branch information
franciscojavierarceo authored Feb 26, 2025
1 parent 1705922 commit fc121c3
Show file tree
Hide file tree
Showing 12 changed files with 504 additions and 136 deletions.
2 changes: 2 additions & 0 deletions sdk/python/feast/feature_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class GetOnlineFeaturesRequest(BaseModel):
features: Optional[List[str]] = None
full_feature_names: bool = False
query_embedding: Optional[List[float]] = None
query_string: Optional[str] = None


def _get_features(request: GetOnlineFeaturesRequest, store: "feast.FeatureStore"):
Expand Down Expand Up @@ -195,6 +196,7 @@ async def retrieve_online_documents(
entity_rows=request.entities,
full_feature_names=request.full_feature_names,
query=request.query_embedding,
query_string=request.query_string,
)

response = await run_in_threadpool(
Expand Down
5 changes: 5 additions & 0 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,7 @@ def retrieve_online_documents_v2(
top_k: int,
features: List[str],
distance_metric: Optional[str] = "L2",
query_string: Optional[str] = None,
) -> OnlineResponse:
"""
Retrieves the top k closest document features. Note, embeddings are a subset of features.
Expand All @@ -1878,6 +1879,7 @@ def retrieve_online_documents_v2(
query: The query to retrieve the closest document features for.
top_k: The number of closest document features to retrieve.
distance_metric: The distance metric to use for retrieval.
query_string: The query string to retrieve the closest document features using keyword search (bm25).
"""
if isinstance(query, str):
raise ValueError(
Expand Down Expand Up @@ -1919,6 +1921,7 @@ def retrieve_online_documents_v2(
query,
top_k,
distance_metric,
query_string,
)

def _retrieve_from_online_store(
Expand Down Expand Up @@ -1988,6 +1991,7 @@ def _retrieve_from_online_store_v2(
query: List[float],
top_k: int,
distance_metric: Optional[str],
query_string: Optional[str],
) -> OnlineResponse:
"""
Search and return document features from the online document store.
Expand All @@ -2003,6 +2007,7 @@ def _retrieve_from_online_store_v2(
query=query,
top_k=top_k,
distance_metric=distance_metric,
query_string=query_string,
)

entity_key_dict: Dict[str, List[ValueProto]] = {}
Expand Down
4 changes: 4 additions & 0 deletions sdk/python/feast/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ def __init__(
else:
features.append(field)

assert len([f for f in features if f.vector_index]) < 2, (
f"Only one vector feature is allowed per feature view. Please update {self.name}."
)

# TODO(felixwang9817): Add more robust validation of features.
cols = [field.name for field in schema]
for col in cols:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ def retrieve_online_documents_v2(
embedding: List[float],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
) -> List[
Tuple[
Optional[datetime],
Expand Down
2 changes: 2 additions & 0 deletions sdk/python/feast/infra/online_stores/online_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ def retrieve_online_documents_v2(
embedding: List[float],
top_k: int,
distance_metric: Optional[str] = None,
query_string: Optional[str] = None,
) -> List[
Tuple[
Optional[datetime],
Expand All @@ -456,6 +457,7 @@ def retrieve_online_documents_v2(
requested_features: The list of features whose embeddings should be used for retrieval.
embedding: The embeddings to use for retrieval.
top_k: The number of documents to retrieve.
query_string: The query string to search for using keyword search (bm25) (optional)
Returns:
object: A list of top k closest documents to the specified embedding. Each item in the list is a tuple
Expand Down
Loading

0 comments on commit fc121c3

Please sign in to comment.