[MOD-12414] Add Internal cursor reads metric to cluster FT.PROFILE output (#7709)

meiravgri · web-flow · commit 005186a403e4 · 2025-12-10T13:24:04.000Z
* add cursor_reads to req struct

update in runCursor

print in Profile_Print if cursor

* Fix: Preserve timeout flag across cursor reads in cluster FT.PROFILE

Use |= instead of = to ensure has_timedout remains true once set,
preventing earlier cursor read warnings from being lost in final output.

* fix test

* fix test

* add message to the test
diff --git a/src/aggregate/aggregate.h b/src/aggregate/aggregate.h
@@ -283,6 +283,10 @@ typedef struct AREQ {
   // Indicates whether the query has timed out.
   // Useful for query with cursor and RETURN policy
   bool has_timedout;
+
+  // Number of cursor reads: 1 for the initial FT.AGGREGATE WITHCURSOR,
+  // plus 1 for each subsequent FT.CURSOR READ call.
+  size_t cursor_reads;
 } AREQ;
 
 /**
diff --git a/src/aggregate/aggregate_exec.c b/src/aggregate/aggregate_exec.c
@@ -472,7 +472,7 @@ static void sendChunk_Resp2(AREQ *req, RedisModule_Reply *reply, size_t limit,
                         && req->reqConfig.timeoutPolicy == TimeoutPolicy_Return));
 
     bool has_timedout = (rc == RS_RESULT_TIMEDOUT) || hasTimeoutError(qctx->err);
-    req->has_timedout = has_timedout;
+    req->has_timedout |= has_timedout;
     if (has_timedout) {
       // Track warnings in global statistics
       // Assuming that if we reached here, timeout is not an error.
@@ -489,7 +489,7 @@ static void sendChunk_Resp2(AREQ *req, RedisModule_Reply *reply, size_t limit,
     RedisSearchCtx *sctx = AREQ_SearchCtx(req);
     ProfilePrinterCtx profileCtx = {
       .req = req,
-      .timedout = has_timedout,
+      .timedout = req->has_timedout,
       .reachedMaxPrefixExpansions = QueryError_HasReachedMaxPrefixExpansionsWarning(qctx->err),
       .bgScanOOM = sctx->spec && sctx->spec->scan_failed_OOM,
       .queryOOM = QueryError_HasQueryOOMWarning(qctx->err),
@@ -638,15 +638,15 @@ static void sendChunk_Resp3(AREQ *req, RedisModule_Reply *reply, size_t limit,
                         && req->reqConfig.timeoutPolicy == TimeoutPolicy_Return));
 
     bool has_timedout = (rc == RS_RESULT_TIMEDOUT) || hasTimeoutError(qctx->err);
-    req->has_timedout = has_timedout;
+    req->has_timedout |= has_timedout;
 
     if (IsProfile(req)) {
       RedisModule_Reply_MapEnd(reply); // >Results
       if (!(AREQ_RequestFlags(req) & QEXEC_F_IS_CURSOR) || cursor_done) {
         // Prepare profile printer context
         ProfilePrinterCtx profileCtx = {
           .req = req,
-          .timedout = has_timedout,
+          .timedout = req->has_timedout,
           .reachedMaxPrefixExpansions = QueryError_HasReachedMaxPrefixExpansionsWarning(qctx->err),
           .bgScanOOM = sctx->spec && sctx->spec->scan_failed_OOM,
           .queryOOM = QueryError_HasQueryOOMWarning(qctx->err),
@@ -1261,6 +1261,7 @@ int AREQ_StartCursor(AREQ *r, RedisModule_Reply *reply, StrongRef spec_ref, Quer
 // Assumes that the cursor has a strong ref to the relevant spec and that it is already locked.
 static void runCursor(RedisModule_Reply *reply, Cursor *cursor, size_t num) {
   AREQ *req = cursor->execState;
+  req->cursor_reads++;
 
   // update timeout for current cursor read
   SearchCtx_UpdateTime(AREQ_SearchCtx(req), req->reqConfig.queryTimeoutMS);
diff --git a/src/profile.c b/src/profile.c
@@ -194,6 +194,13 @@ void Profile_Print(RedisModule_Reply *reply, void *ctx) {
     RedisModule_ReplyKV_SimpleString(reply, "Warning", "None");
   }
 
+  // Print cursor reads count if this is a cursor request.
+  if (IsCursor(req)) {
+    // Only internal requests can use profile with cursor.
+    RS_ASSERT(IsInternal(req));
+    RedisModule_ReplyKV_LongLong(reply, "Internal cursor reads", req->cursor_reads);
+  }
+
   // Print profile of iterators
   QueryIterator *root = QITR_GetRootFilter(qctx);
   // Coordinator does not have iterators
diff --git a/tests/pytests/test_aggregate_count.py b/tests/pytests/test_aggregate_count.py
@@ -54,7 +54,7 @@ def _get_cluster_RP_profile(env, res) -> list:
 
     else:
         for i in range(len(res[1][1])):
-            shard = res[1][1][i][13]
+            shard = res[1][1][i][15]
             shard_RP_and_count.append([(item[1], item[5]) for item in shard])
 
         # sort shard by the number of results processed by the first RP
diff --git a/tests/pytests/test_profile.py b/tests/pytests/test_profile.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+import math
 import unittest
 from includes import *
 from common import *
@@ -589,6 +590,119 @@ def testTimedOutWarningCoordResp3():
 def testTimedOutWarningCoordResp2():
   TimedOutWarningtestCoord(Env(protocol=2))
 
+def get_shards_profile(env, res):
+  """Extract shard profiles from FT.PROFILE AGGREGATE response."""
+  if env.protocol == 3:
+    return res['Profile']['Shards']
+  else:
+    return [to_dict(p) for p in res[-1][1]]
+
+def InternalCursorReadsInProfile(protocol):
+  """Tests that 'Internal cursor reads' appears in shard profiles for AGGREGATE."""
+  # Limit number of shards to avoid creating too many docs
+  env = Env(shardsCount=2, protocol=protocol)
+  conn = getConnectionByEnv(env)
+  env.cmd(config_cmd(), 'SET', '_PRINT_PROFILE_CLOCK', 'false')
+
+  env.expect('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT').ok()
+
+  # Insert docs - with default cursorReadSize=1000, each shard needs more than 1000 to require 2 reads
+  num_docs = int(1000 * 1.1 * env.shardsCount)
+  for i in range(num_docs):
+    conn.execute_command('HSET', f'doc{i}', 't', f'hello{i}')
+
+  # Run FT.PROFILE AGGREGATE - coordinator uses internal cursors to shards
+  res = env.cmd('FT.PROFILE', 'idx', 'AGGREGATE', 'QUERY', '*')
+
+  shards_profile = get_shards_profile(env, res)
+  env.assertEqual(len(shards_profile), env.shardsCount, message=f"unexpected number of shards. full reply output: {res}")
+
+  # Each shard should have exactly 2 cursor reads (1000+ docs per shard, default cursorReadSize=1000)
+  for shard_profile in shards_profile:
+    env.assertContains('Internal cursor reads', shard_profile)
+    env.assertEqual(shard_profile['Internal cursor reads'], 2)
+
+@skip(cluster=False)
+def testInternalCursorReadsInProfileResp3():
+  InternalCursorReadsInProfile(protocol=3)
+
+@skip(cluster=False)
+def testInternalCursorReadsInProfileResp2():
+  InternalCursorReadsInProfile(protocol=2)
+
+@skip(cluster=False)
+def testInternalCursorReadsWithTimeoutResp3():
+  """Tests 'Internal cursor reads' with timeout - RESP3 coordinator detects timeout and stops early."""
+  env = Env(protocol=3)
+  conn = getConnectionByEnv(env)
+  run_command_on_all_shards(env, config_cmd(), 'SET', '_PRINT_PROFILE_CLOCK', 'false')
+
+  env.expect('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT').ok()
+
+  num_docs = 100
+  for i in range(num_docs):
+    conn.execute_command('HSET', f'doc{i}', 't', f'hello{i}')
+
+  # Run FT.PROFILE AGGREGATE with simulated timeout on shards only
+  query = ['FT.PROFILE', 'idx', 'AGGREGATE', 'QUERY', '*']
+  timeout_after_n = 5
+  res = runDebugQueryCommandTimeoutAfterN(env, query, timeout_after_n, internal_only=True)
+
+  # RESP3: coordinator detects shard timeout and stops early after reading first shard's reply
+  # Results count equals first shard's reply length (timeout_after_n)
+  env.assertEqual(len(res['Results']['results']), timeout_after_n)
+
+  shards_profile = get_shards_profile(env, res)
+  for shard_profile in shards_profile:
+    env.assertContains('Internal cursor reads', shard_profile, message=f"full reply output: {res}")
+    # Coordinator stops after first timeout, so only 1 cursor read per shard
+    env.assertEqual(shard_profile['Internal cursor reads'], 1, message=f"full reply output: {res}")
+    env.assertEqual(shard_profile['Warning'], 'Timeout limit was reached', message=f"full reply output: {res}")
+
+@skip(cluster=False)
+def testInternalCursorReadsWithTimeoutResp2():
+  """Tests 'Internal cursor reads' with timeout - RESP2 coordinator doesn't detect timeout, reads until EOF."""
+  env = Env(shardsCount=2, protocol=2)
+  conn = getConnectionByEnv(env)
+  run_command_on_all_shards(env, config_cmd(), 'SET', '_PRINT_PROFILE_CLOCK', 'false')
+
+  env.expect('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT').ok()
+
+  num_docs = 100
+  for i in range(num_docs):
+    conn.execute_command('HSET', f'doc{i}', 't', f'hello{i}')
+
+  # Run FT.PROFILE AGGREGATE with simulated timeout on shards only
+  query = ['FT.PROFILE', 'idx', 'AGGREGATE', 'QUERY', '*']
+  timeout_after_n = 5
+  res = runDebugQueryCommandTimeoutAfterN(env, query, timeout_after_n, internal_only=True)
+
+  # RESP2: coordinator doesn't check shard timeout, reads until EOF
+  # All docs are returned
+  env.assertEqual(len(res[0]) - 1, num_docs)
+
+  shards_profile = get_shards_profile(env, res)
+  env.assertEqual(len(shards_profile), env.shardsCount, message=f"unexpected number of shards. full reply output: {res}")
+
+  # Verify total cursor reads matches expected (order of shards may differ)
+  total_expected_reads = 0
+  for shard_conn in env.getOSSMasterNodesConnectionList():
+    docs_on_shard = shard_conn.execute_command('DBSIZE')
+    total_expected_reads += math.ceil(docs_on_shard / timeout_after_n)
+
+  # The order of shards in the profile response may differ, so we can't check per-shard
+  total_actual_reads = sum(sp['Internal cursor reads'] for sp in shards_profile)
+  env.assertEqual(total_actual_reads, total_expected_reads, message=f"full reply output: {res}")
+
+  # Verify each shard has warning
+  for shard_profile in shards_profile:
+    env.assertContains('Internal cursor reads', shard_profile, message=f"full reply output: {res}")
+    env.assertEqual(shard_profile['Warning'], 'Timeout limit was reached', message=f"full reply output: {res}")
+
+  # Coordinator should NOT have timeout warning (it doesn't detect it in RESP2)
+  coord_profile = to_dict(res[-1][-1])
+  env.assertEqual(coord_profile['Warning'], 'None', message=f"full reply output: {res}")
+
 # This test is currently skipped due to flaky behavior of some of the machines'
 # timers. MOD-6436
 @skip()
diff --git a/tests/pytests/test_resp3.py b/tests/pytests/test_resp3.py
@@ -252,6 +252,7 @@ def test_coord_profile():
       'Pipeline creation time': ANY,
       'Total GIL time': ANY,
       'Warning': 'None',
+      'Internal cursor reads': ANY,
       'Iterators profile': {'Type': 'WILDCARD', 'Time': ANY, 'Number of reading operations': ANY},
       'Result processors profile': [{'Type': 'Index', 'Time': ANY, 'Results processed': ANY},]
     }

Original file line number	Diff line number	Diff line change
`@@ -252,6 +252,7 @@ def test_coord_profile():`
`252`	`252`	`'Pipeline creation time': ANY,`
`253`	`253`	`'Total GIL time': ANY,`
`254`	`254`	`'Warning': 'None',`
	`255`	`+ 'Internal cursor reads': ANY,`
`255`	`256`	`'Iterators profile': {'Type': 'WILDCARD', 'Time': ANY, 'Number of reading operations': ANY},`
`256`	`257`	`'Result processors profile': [{'Type': 'Index', 'Time': ANY, 'Results processed': ANY},]`
`257`	`258`	`}`