@@ -1831,19 +1831,15 @@ async def get_online_features_async(
18311831
18321832 def retrieve_online_documents (
18331833 self ,
1834- feature : Optional [str ],
18351834 query : Union [str , List [float ]],
18361835 top_k : int ,
1837- features : Optional [ List [str ]] = None ,
1836+ features : List [str ],
18381837 distance_metric : Optional [str ] = "L2" ,
18391838 ) -> OnlineResponse :
18401839 """
18411840 Retrieves the top k closest document features. Note, embeddings are a subset of features.
18421841
18431842 Args:
1844- feature: The list of document features that should be retrieved from the online document store. These features can be
1845- specified either as a list of string document feature references or as a feature service. String feature
1846- references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
18471843 features: The list of features that should be retrieved from the online store.
18481844 query: The query to retrieve the closest document features for.
18491845 top_k: The number of closest document features to retrieve.
@@ -1853,68 +1849,55 @@ def retrieve_online_documents(
18531849 raise ValueError (
18541850 "Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents."
18551851 )
1856- feature_list : List [str ] = (
1857- features
1858- if features is not None
1859- else ([feature ] if feature is not None else [])
1860- )
18611852
18621853 (
18631854 available_feature_views ,
18641855 _ ,
18651856 ) = utils ._get_feature_views_to_use (
18661857 registry = self ._registry ,
18671858 project = self .project ,
1868- features = feature_list ,
1859+ features = features ,
18691860 allow_cache = True ,
18701861 hide_dummy_entity = False ,
18711862 )
1872- if features :
1873- feature_view_set = set ()
1874- for feature in features :
1875- feature_view_name = feature .split (":" )[0 ]
1876- feature_view = self .get_feature_view (feature_view_name )
1877- feature_view_set .add (feature_view .name )
1878- if len (feature_view_set ) > 1 :
1879- raise ValueError (
1880- "Document retrieval only supports a single feature view."
1881- )
1882- requested_feature = None
1883- requested_features = [
1884- f .split (":" )[1 ] for f in features if isinstance (f , str ) and ":" in f
1885- ]
1886- else :
1887- requested_feature = (
1888- feature .split (":" )[1 ] if isinstance (feature , str ) else feature
1889- )
1890- requested_features = [requested_feature ] if requested_feature else []
1891-
1892- requested_feature_view_name = (
1893- feature .split (":" )[0 ] if feature else list (feature_view_set )[0 ]
1894- )
1863+ feature_view_set = set ()
1864+ for _feature in features :
1865+ feature_view_name = _feature .split (":" )[0 ]
1866+ feature_view = self .get_feature_view (feature_view_name )
1867+ feature_view_set .add (feature_view .name )
1868+ if len (feature_view_set ) > 1 :
1869+ raise ValueError ("Document retrieval only supports a single feature view." )
1870+ requested_features = [
1871+ f .split (":" )[1 ] for f in features if isinstance (f , str ) and ":" in f
1872+ ]
1873+ requested_feature_view_name = list (feature_view_set )[0 ]
18951874 for feature_view in available_feature_views :
18961875 if feature_view .name == requested_feature_view_name :
18971876 requested_feature_view = feature_view
1898- if not requested_feature_view :
1877+ break
1878+ else :
18991879 raise ValueError (
19001880 f"Feature view { requested_feature_view } not found in the registry."
19011881 )
19021882
1903- requested_feature_view = available_feature_views [0 ]
1904-
19051883 provider = self ._get_provider ()
19061884 document_features = self ._retrieve_from_online_store (
19071885 provider ,
19081886 requested_feature_view ,
1909- requested_feature ,
19101887 requested_features ,
19111888 query ,
19121889 top_k ,
19131890 distance_metric ,
19141891 )
1892+
19151893 # TODO currently not return the vector value since it is same as feature value, if embedding is supported,
19161894 # the feature value can be raw text before embedded
1917- entity_key_vals = [feature [1 ] for feature in document_features ]
1895+ def _doc_feature (x ):
1896+ return [feature [x ] for feature in document_features ]
1897+
1898+ entity_key_vals , document_feature_vals , document_feature_distance_vals = map (
1899+ _doc_feature , (1 , 4 , 5 )
1900+ )
19181901 join_key_values : Dict [str , List [ValueProto ]] = {}
19191902 for entity_key_val in entity_key_vals :
19201903 if entity_key_val is not None :
@@ -1924,18 +1907,25 @@ def retrieve_online_documents(
19241907 if join_key not in join_key_values :
19251908 join_key_values [join_key ] = []
19261909 join_key_values [join_key ].append (entity_value )
1927-
1928- document_feature_vals = [feature [4 ] for feature in document_features ]
1929- document_feature_distance_vals = [feature [5 ] for feature in document_features ]
19301910 online_features_response = GetOnlineFeaturesResponse (results = [])
1931- requested_feature = requested_feature or requested_features [0 ]
1911+ if vector_field_metadata := _get_feature_view_vector_field_metadata (
1912+ requested_feature_view
1913+ ):
1914+ vector_field_name = vector_field_metadata .name
1915+ data = {
1916+ ** join_key_values ,
1917+ vector_field_name : document_feature_vals ,
1918+ "distance" : document_feature_distance_vals ,
1919+ }
1920+ _requested_features = [_feature .split (":" )[- 1 ] for _feature in features ]
1921+ requested_features_data = {
1922+ _feature : data [_feature ]
1923+ for _feature in _requested_features
1924+ if _feature in data
1925+ }
19321926 utils ._populate_result_rows_from_columnar (
19331927 online_features_response = online_features_response ,
1934- data = {
1935- ** join_key_values ,
1936- requested_feature : document_feature_vals ,
1937- "distance" : document_feature_distance_vals ,
1938- },
1928+ data = requested_features_data ,
19391929 )
19401930 return OnlineResponse (online_features_response )
19411931
@@ -2012,7 +2002,6 @@ def _retrieve_from_online_store(
20122002 self ,
20132003 provider : Provider ,
20142004 table : FeatureView ,
2015- requested_feature : Optional [str ],
20162005 requested_features : Optional [List [str ]],
20172006 query : List [float ],
20182007 top_k : int ,
@@ -2032,7 +2021,6 @@ def _retrieve_from_online_store(
20322021 documents = provider .retrieve_online_documents (
20332022 config = self .config ,
20342023 table = table ,
2035- requested_feature = requested_feature ,
20362024 requested_features = requested_features ,
20372025 query = query ,
20382026 top_k = top_k ,
0 commit comments