[MOD-12647] fix: handle the case in Coordinator when SCORE is sent alone without extra fields. (#7492)

JoanFM · github-actions[bot] · commit 70656cd77e64 · 2025-11-27T13:30:59.000Z
* fix: Do not send score alone if Expired flag is set * test: add test_expire_ft_hybrid * improve test * do not send NULL, simply avoid serialize result if expired * improving test * handle more complete hybrid expire test * small change in test * revert changes in hybrid exec * remove assertion that cannot be guaranteed, and handle no extra_attributes in response from shard * handle both resp protocols (cherry picked from commit b916f07)
diff --git a/src/coord/rpnet.c b/src/coord/rpnet.c
@@ -347,15 +347,18 @@ int rpnetNext(ResultProcessor *self, SearchResult *r) {
 
   MRReply *score = NULL;
   MRReply *fields = MRReply_ArrayElement(rows, nc->curIdx++);
+  bool has_fields = false;
   if (resp3) {
     RS_LOG_ASSERT(fields && MRReply_Type(fields) == MR_REPLY_MAP, "invalid result record");
     // extract score if it exists, WITHSCORES was specified
     score = MRReply_MapElement(fields, "score");
     fields = MRReply_MapElement(fields, "extra_attributes");
-    RS_LOG_ASSERT(fields && MRReply_Type(fields) == MR_REPLY_MAP, "invalid fields record");
+    // It could happen if Result_ExpiredDoc is set by the Loader on the shard, that no extra attributes is returned. In that case
+    // we do not have keys to return.
+    has_fields = fields && MRReply_Type(fields) == MR_REPLY_MAP;
   } else {
-    RS_LOG_ASSERT(fields && MRReply_Type(fields) == MR_REPLY_ARRAY, "invalid result record");
-    RS_LOG_ASSERT(MRReply_Length(fields) % 2 == 0, "invalid fields record");
+    has_fields = fields && MRReply_Type(fields) == MR_REPLY_ARRAY;
+    RS_LOG_ASSERT(!has_fields || has_fields && MRReply_Length(fields) % 2 == 0, "invalid fields record");
   }
 
   // The score is optional, in hybrid we need the score for the sorter and hybrid merger
@@ -366,12 +369,14 @@ int rpnetNext(ResultProcessor *self, SearchResult *r) {
     SearchResult_SetScore(r, MRReply_Double(score));
   }
 
-  for (size_t i = 0; i < MRReply_Length(fields); i += 2) {
-    size_t len;
-    const char *field = MRReply_String(MRReply_ArrayElement(fields, i), &len);
-    MRReply *val = MRReply_ArrayElement(fields, i + 1);
-    RSValue *v = MRReply_ToValue(val);
-    RLookup_WriteOwnKeyByName(nc->lookup, field, len, SearchResult_GetRowDataMut(r), v);
+  if (has_fields) {
+    for (size_t i = 0; i < MRReply_Length(fields); i += 2) {
+      size_t len;
+      const char *field = MRReply_String(MRReply_ArrayElement(fields, i), &len);
+      MRReply *val = MRReply_ArrayElement(fields, i + 1);
+      RSValue *v = MRReply_ToValue(val);
+      RLookup_WriteOwnKeyByName(nc->lookup, field, len, SearchResult_GetRowDataMut(r), v);
+    }
   }
   return RS_RESULT_OK;
 }
diff --git a/tests/pytests/common.py b/tests/pytests/common.py
@@ -1001,7 +1001,16 @@ def get_results_from_hybrid_response(response) -> Dict[str, Dict[str, any]]:
         Dict mapping key -> dict of all fields from the results list
         Example: {'doc:1': {'__score': '0.5', 'vector_distance': '0.3'}}
     """
-    # return dict mapping key -> all fields from the results list
+    # Handle RESP3 format (dict)
+    if isinstance(response, dict):
+        results = {}
+        for result in response.get('results', []):
+            if '__key' in result:
+                key = result['__key']
+                results[key] = result
+        total_results = response.get('total_results', 0)
+        return results, total_results
+
     res_results_index = recursive_index(response, 'results')
     res_count_index = recursive_index(response, 'total_results')
     res_results_index[-1] += 1
diff --git a/tests/pytests/test_expire.py b/tests/pytests/test_expire.py
@@ -253,6 +253,72 @@ def test_expire_aggregate(env):
     # The result count is not accurate in aggregation, for now we compare res to the expected results with the wrong count
     env.assertEqual(res, [1, ['t', 'arr'], ['t', 'bar']])
 
+
+def expire_ft_hybrid_test(protocol):
+    env = Env(protocol=protocol)
+        # Use "lazy" expire (expire only when key is accessed) on all shards
+    env.cmd('DEBUG', 'SET-ACTIVE-EXPIRE', '0')
+
+    # Create index with text, vector, and numeric fields
+    env.expect('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT', 'n', 'NUMERIC', 'v', 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', '2', 'DISTANCE_METRIC', 'L2').ok()
+
+    # Create test vectors (2-dimensional float32)
+    import numpy as np
+    query_vector = np.array([0.5, 0.5]).astype(np.float32).tobytes()
+
+    # Use cluster-aware connection for data insertion
+    with env.getClusterConnectionIfNeeded() as conn:
+        # Create 1000 documents
+        for i in range(1000):
+            # Create a unique vector for each document
+            vector = np.array([float(i % 100) / 100.0, float((i + 1) % 100) / 100.0]).astype(np.float32).tobytes()
+            doc_key = f'doc{i}'
+            text_value = f'text{i}'
+            numeric_value = str(i)
+
+            conn.execute_command('HSET', doc_key, 't', text_value, 'n', numeric_value, 'v', vector)
+
+            # Expire the first 990 documents (doc0 to doc989)
+            if i < 990:
+                conn.execute_command('PEXPIRE', doc_key, 1)
+
+    # Ensure expiration before query
+    time.sleep(0.01)
+
+    # Test FT.HYBRID requesting 1000 results but expecting only 10 (non-expired documents)
+    hybrid_query = ['FT.HYBRID', 'idx', 'SEARCH', '*', 'VSIM', '@v', query_vector, 'LIMIT', '0', '1000', 'COMBINE', 'RRF', '2', 'CONSTANT', '60', 'LOAD', '4', '@__key', '@__score', '@t', '@n']
+
+    # Execute query using cluster-aware command to get expected results
+    actual_res = env.cmd(*hybrid_query)
+    from common import get_results_from_hybrid_response
+    actual_results_dict, actual_total_results = get_results_from_hybrid_response(actual_res)
+
+    # Validate that only 10 documents are returned (doc990 to doc999)
+    env.assertEqual(actual_total_results, 10)
+
+    # Verify that only non-expired documents are present
+    expected_doc_keys = {f'doc{i}' for i in range(990, 1000)}
+    actual_doc_keys = set(actual_results_dict.keys())
+    env.assertEqual(actual_doc_keys, expected_doc_keys)
+
+    # Verify that each returned document has the correct attributes
+    for doc_key in actual_results_dict:
+        doc_num = int(doc_key[3:])  # Extract number from 'docXXX'
+        env.assertTrue('__key' in actual_results_dict[doc_key])
+        env.assertTrue('__score' in actual_results_dict[doc_key])
+        env.assertTrue('t' in actual_results_dict[doc_key])
+        env.assertTrue('n' in actual_results_dict[doc_key])
+        env.assertEqual(actual_results_dict[doc_key]['__key'], doc_key)
+        env.assertEqual(actual_results_dict[doc_key]['t'], f'text{doc_num}')
+        env.assertEqual(actual_results_dict[doc_key]['n'], str(doc_num))
+        env.assertTrue(float(actual_results_dict[doc_key]['__score']) >= 0)
+
+def test_expire_ft_hybrid_resp2():
+    expire_ft_hybrid_test(protocol=2)
+
+def test_expire_ft_hybrid_resp3():
+    expire_ft_hybrid_test(protocol=3)
+
 def createTextualSchema(field_to_additional_schema_keywords):
     schema = []
     for field, additional_schema_words in field_to_additional_schema_keywords.items():