Skip to content

Commit 66ddd3e

Browse files
authored
fix: Multiple fixes in retrieval of online documents (#5168)
* Fixed Retrive online documents for serialization ver 3 Signed-off-by: jyejare <jyejare@redhat.com> * Dynamic vector field name Signed-off-by: jyejare <jyejare@redhat.com> * Requested only features with values Signed-off-by: jyejare <jyejare@redhat.com> * Removing the singular feature parameter for doc retrieval Signed-off-by: jyejare <jyejare@redhat.com> * Documentation and other DBs test fixes Signed-off-by: jyejare <jyejare@redhat.com> * Review Fixes Signed-off-by: jyejare <jyejare@redhat.com> --------- Signed-off-by: jyejare <jyejare@redhat.com>
1 parent cc7fd47 commit 66ddd3e

File tree

13 files changed

+61
-81
lines changed

13 files changed

+61
-81
lines changed

docs/reference/online-stores/elasticsearch.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ top_k = 5
6868
# Retrieve the top k closest features to the query vector
6969

7070
feature_values = feature_store.retrieve_online_documents(
71-
feature="my_feature",
71+
features=["my_feature"],
7272
query=query_vector,
7373
top_k=top_k
7474
)

docs/reference/online-stores/qdrant.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ top_k = 5
7070
# the vector to use can be specified in the repo config.
7171
# Reference: https://qdrant.tech/documentation/concepts/vectors/#named-vectors
7272
feature_values = feature_store.retrieve_online_documents(
73-
feature="my_feature",
73+
features=["my_feature"],
7474
query=query_vector,
7575
top_k=top_k
7676
)

sdk/python/feast/feature_store.py

Lines changed: 38 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,19 +1831,15 @@ async def get_online_features_async(
18311831

18321832
def retrieve_online_documents(
18331833
self,
1834-
feature: Optional[str],
18351834
query: Union[str, List[float]],
18361835
top_k: int,
1837-
features: Optional[List[str]] = None,
1836+
features: List[str],
18381837
distance_metric: Optional[str] = "L2",
18391838
) -> OnlineResponse:
18401839
"""
18411840
Retrieves the top k closest document features. Note, embeddings are a subset of features.
18421841
18431842
Args:
1844-
feature: The list of document features that should be retrieved from the online document store. These features can be
1845-
specified either as a list of string document feature references or as a feature service. String feature
1846-
references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
18471843
features: The list of features that should be retrieved from the online store.
18481844
query: The query to retrieve the closest document features for.
18491845
top_k: The number of closest document features to retrieve.
@@ -1853,68 +1849,55 @@ def retrieve_online_documents(
18531849
raise ValueError(
18541850
"Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents."
18551851
)
1856-
feature_list: List[str] = (
1857-
features
1858-
if features is not None
1859-
else ([feature] if feature is not None else [])
1860-
)
18611852

18621853
(
18631854
available_feature_views,
18641855
_,
18651856
) = utils._get_feature_views_to_use(
18661857
registry=self._registry,
18671858
project=self.project,
1868-
features=feature_list,
1859+
features=features,
18691860
allow_cache=True,
18701861
hide_dummy_entity=False,
18711862
)
1872-
if features:
1873-
feature_view_set = set()
1874-
for feature in features:
1875-
feature_view_name = feature.split(":")[0]
1876-
feature_view = self.get_feature_view(feature_view_name)
1877-
feature_view_set.add(feature_view.name)
1878-
if len(feature_view_set) > 1:
1879-
raise ValueError(
1880-
"Document retrieval only supports a single feature view."
1881-
)
1882-
requested_feature = None
1883-
requested_features = [
1884-
f.split(":")[1] for f in features if isinstance(f, str) and ":" in f
1885-
]
1886-
else:
1887-
requested_feature = (
1888-
feature.split(":")[1] if isinstance(feature, str) else feature
1889-
)
1890-
requested_features = [requested_feature] if requested_feature else []
1891-
1892-
requested_feature_view_name = (
1893-
feature.split(":")[0] if feature else list(feature_view_set)[0]
1894-
)
1863+
feature_view_set = set()
1864+
for _feature in features:
1865+
feature_view_name = _feature.split(":")[0]
1866+
feature_view = self.get_feature_view(feature_view_name)
1867+
feature_view_set.add(feature_view.name)
1868+
if len(feature_view_set) > 1:
1869+
raise ValueError("Document retrieval only supports a single feature view.")
1870+
requested_features = [
1871+
f.split(":")[1] for f in features if isinstance(f, str) and ":" in f
1872+
]
1873+
requested_feature_view_name = list(feature_view_set)[0]
18951874
for feature_view in available_feature_views:
18961875
if feature_view.name == requested_feature_view_name:
18971876
requested_feature_view = feature_view
1898-
if not requested_feature_view:
1877+
break
1878+
else:
18991879
raise ValueError(
19001880
f"Feature view {requested_feature_view} not found in the registry."
19011881
)
19021882

1903-
requested_feature_view = available_feature_views[0]
1904-
19051883
provider = self._get_provider()
19061884
document_features = self._retrieve_from_online_store(
19071885
provider,
19081886
requested_feature_view,
1909-
requested_feature,
19101887
requested_features,
19111888
query,
19121889
top_k,
19131890
distance_metric,
19141891
)
1892+
19151893
# TODO currently not return the vector value since it is same as feature value, if embedding is supported,
19161894
# the feature value can be raw text before embedded
1917-
entity_key_vals = [feature[1] for feature in document_features]
1895+
def _doc_feature(x):
1896+
return [feature[x] for feature in document_features]
1897+
1898+
entity_key_vals, document_feature_vals, document_feature_distance_vals = map(
1899+
_doc_feature, (1, 4, 5)
1900+
)
19181901
join_key_values: Dict[str, List[ValueProto]] = {}
19191902
for entity_key_val in entity_key_vals:
19201903
if entity_key_val is not None:
@@ -1924,18 +1907,25 @@ def retrieve_online_documents(
19241907
if join_key not in join_key_values:
19251908
join_key_values[join_key] = []
19261909
join_key_values[join_key].append(entity_value)
1927-
1928-
document_feature_vals = [feature[4] for feature in document_features]
1929-
document_feature_distance_vals = [feature[5] for feature in document_features]
19301910
online_features_response = GetOnlineFeaturesResponse(results=[])
1931-
requested_feature = requested_feature or requested_features[0]
1911+
if vector_field_metadata := _get_feature_view_vector_field_metadata(
1912+
requested_feature_view
1913+
):
1914+
vector_field_name = vector_field_metadata.name
1915+
data = {
1916+
**join_key_values,
1917+
vector_field_name: document_feature_vals,
1918+
"distance": document_feature_distance_vals,
1919+
}
1920+
_requested_features = [_feature.split(":")[-1] for _feature in features]
1921+
requested_features_data = {
1922+
_feature: data[_feature]
1923+
for _feature in _requested_features
1924+
if _feature in data
1925+
}
19321926
utils._populate_result_rows_from_columnar(
19331927
online_features_response=online_features_response,
1934-
data={
1935-
**join_key_values,
1936-
requested_feature: document_feature_vals,
1937-
"distance": document_feature_distance_vals,
1938-
},
1928+
data=requested_features_data,
19391929
)
19401930
return OnlineResponse(online_features_response)
19411931

@@ -2012,7 +2002,6 @@ def _retrieve_from_online_store(
20122002
self,
20132003
provider: Provider,
20142004
table: FeatureView,
2015-
requested_feature: Optional[str],
20162005
requested_features: Optional[List[str]],
20172006
query: List[float],
20182007
top_k: int,
@@ -2032,7 +2021,6 @@ def _retrieve_from_online_store(
20322021
documents = provider.retrieve_online_documents(
20332022
config=self.config,
20342023
table=table,
2035-
requested_feature=requested_feature,
20362024
requested_features=requested_features,
20372025
query=query,
20382026
top_k=top_k,

sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,7 @@ def retrieve_online_documents(
213213
self,
214214
config: RepoConfig,
215215
table: FeatureView,
216-
requested_feature: Optional[str],
217-
requested_features: Optional[List[str]],
216+
requested_features: List[str],
218217
embedding: List[float],
219218
top_k: int,
220219
*args,

sdk/python/feast/infra/online_stores/faiss_online_store.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ def retrieve_online_documents(
176176
self,
177177
config: RepoConfig,
178178
table: FeatureView,
179-
requested_feature: Optional[str],
180-
requested_featres: Optional[List[str]],
179+
requested_featres: List[str],
181180
embedding: List[float],
182181
top_k: int,
183182
distance_metric: Optional[str] = None,

sdk/python/feast/infra/online_stores/online_store.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,7 @@ def retrieve_online_documents(
392392
self,
393393
config: RepoConfig,
394394
table: FeatureView,
395-
requested_feature: Optional[str],
396-
requested_features: Optional[List[str]],
395+
requested_features: List[str],
397396
embedding: List[float],
398397
top_k: int,
399398
distance_metric: Optional[str] = None,
@@ -413,7 +412,6 @@ def retrieve_online_documents(
413412
distance_metric: distance metric to use for retrieval.
414413
config: The config for the current feature store.
415414
table: The feature view whose feature values should be read.
416-
requested_feature: The name of the feature whose embeddings should be used for retrieval.
417415
requested_features: The list of features whose embeddings should be used for retrieval.
418416
embedding: The embeddings to use for retrieval.
419417
top_k: The number of documents to retrieve.
@@ -423,10 +421,8 @@ def retrieve_online_documents(
423421
where the first item is the event timestamp for the row, and the second item is a dict of feature
424422
name to embeddings.
425423
"""
426-
if not requested_feature and not requested_features:
427-
raise ValueError(
428-
"Either requested_feature or requested_features must be specified"
429-
)
424+
if not requested_features:
425+
raise ValueError("Requested_features must be specified")
430426
raise NotImplementedError(
431427
f"Online store {self.__class__.__name__} does not support online retrieval"
432428
)

sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,6 @@ def retrieve_online_documents(
354354
self,
355355
config: RepoConfig,
356356
table: FeatureView,
357-
requested_feature: Optional[str],
358357
requested_features: Optional[List[str]],
359358
embedding: List[float],
360359
top_k: int,
@@ -373,7 +372,6 @@ def retrieve_online_documents(
373372
Args:
374373
config: Feast configuration object
375374
table: FeatureView object as the table to search
376-
requested_feature: The requested feature as the column to search
377375
requested_features: The list of features whose embeddings should be used for retrieval.
378376
embedding: The query embedding to search for
379377
top_k: The number of items to return
@@ -394,6 +392,11 @@ def retrieve_online_documents(
394392
f"Distance metric {distance_metric} is not supported. Supported distance metrics are {SUPPORTED_DISTANCE_METRICS_DICT.keys()}"
395393
)
396394

395+
if requested_features:
396+
required_feature_names = ", ".join(
397+
[feature for feature in requested_features]
398+
)
399+
397400
distance_metric_sql = SUPPORTED_DISTANCE_METRICS_DICT[distance_metric]
398401

399402
result: List[
@@ -415,19 +418,18 @@ def retrieve_online_documents(
415418
"""
416419
SELECT
417420
entity_key,
418-
feature_name,
421+
{feature_names},
419422
value,
420423
vector_value,
421424
vector_value {distance_metric_sql} %s::vector as distance,
422425
event_ts FROM {table_name}
423-
WHERE feature_name = {feature_name}
424426
ORDER BY distance
425427
LIMIT {top_k};
426428
"""
427429
).format(
428430
distance_metric_sql=sql.SQL(distance_metric_sql),
429431
table_name=sql.Identifier(table_name),
430-
feature_name=sql.Literal(requested_feature),
432+
feature_names=required_feature_names,
431433
top_k=sql.Literal(top_k),
432434
),
433435
(embedding,),

sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,7 @@ def retrieve_online_documents(
248248
self,
249249
config: RepoConfig,
250250
table: FeatureView,
251-
requested_feature: Optional[str],
252-
requested_features: Optional[List[str]],
251+
requested_features: List[str],
253252
embedding: List[float],
254253
top_k: int,
255254
distance_metric: Optional[str] = "cosine",

sdk/python/feast/infra/online_stores/sqlite.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,7 @@ def retrieve_online_documents(
322322
self,
323323
config: RepoConfig,
324324
table: FeatureView,
325-
requested_feature: Optional[str],
326-
requested_featuers: Optional[List[str]],
325+
requested_features: List[str],
327326
embedding: List[float],
328327
top_k: int,
329328
distance_metric: Optional[str] = None,
@@ -341,7 +340,7 @@ def retrieve_online_documents(
341340
Args:
342341
config: Feast configuration object
343342
table: FeatureView object as the table to search
344-
requested_feature: The requested feature as the column to search
343+
requested_features: The list of requested features to retrieve
345344
embedding: The query embedding to search for
346345
top_k: The number of items to return
347346
Returns:

sdk/python/feast/infra/passthrough_provider.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ def retrieve_online_documents(
294294
self,
295295
config: RepoConfig,
296296
table: FeatureView,
297-
requested_feature: Optional[str],
298297
requested_features: Optional[List[str]],
299298
query: List[float],
300299
top_k: int,
@@ -305,7 +304,6 @@ def retrieve_online_documents(
305304
result = self.online_store.retrieve_online_documents(
306305
config,
307306
table,
308-
requested_feature,
309307
requested_features,
310308
query,
311309
top_k,

0 commit comments

Comments
 (0)