Skip to content

box: read snapshot in a distinct thread to improve recovery performance#11861

Merged
locker merged 3 commits intotarantool:masterfrom
nshy:recovery-thread
Oct 23, 2025
Merged

box: read snapshot in a distinct thread to improve recovery performance#11861
locker merged 3 commits intotarantool:masterfrom
nshy:recovery-thread

Conversation

@nshy
Copy link
Contributor

@nshy nshy commented Sep 29, 2025

Validating requests MsgPack and parsing the requests body turns out to be significant part of recovery in terms of CPU. Let's move it to a distinct thread. This is similar to iproto and applier where we have a distinct thread(s) for that.

Snapshot is written in batches of about 128k
(XLOG_TX_AUTOCOMMIT_THRESHOLD). The recovery thread reads the batch, parses its requests and send to TX to process. The maximum number of read but not processed batches is limited to 2 to make a flow control.

Closes #11888

Performance test results

Recovery performance is evaluated using below perf test. The 1.10.5 performance is taken from the previous evaluations for the issue.

tarantool perf/lua/recovery.lua \
  --row_count 10000000 --column_count 10 --recovery_count 5 \
  --preload_script "require('compat').box_recovery_triggers_deprecation = 'new'"
Run                 | Perf, krps |
---------------------------------|
master, current     |    1060    |
---------------------------------|
master, patched     |    1800    |
---------------------------------|
1.10.5              |    1315    |

So introducing snapshot reader thread gives 70% boost for recovery from snapshot. Now master is 37% faster than 1.10.5.

If box_recovery_triggers_deprecation = 'old' then performance is only 990 krps.

@nshy nshy force-pushed the recovery-thread branch 2 times, most recently from 9f5dae6 to 40d271e Compare September 29, 2025 14:04
@nshy nshy requested a review from locker September 29, 2025 14:19
@coveralls
Copy link

coveralls commented Sep 29, 2025

Coverage Status

coverage: 87.642% (+0.01%) from 87.632%
when pulling 712d6b9 on nshy:recovery-thread
into a854a65
on tarantool:master
.

@locker locker assigned nshy and unassigned locker Sep 30, 2025
@nshy nshy force-pushed the recovery-thread branch from 40d271e to 52272ea Compare October 7, 2025 10:13
@nshy nshy requested a review from a team as a code owner October 7, 2025 10:13
@nshy nshy force-pushed the recovery-thread branch 2 times, most recently from c8e3679 to 48230f8 Compare October 8, 2025 07:17
@nshy nshy assigned locker and unassigned nshy Oct 8, 2025
@locker locker assigned nshy and unassigned locker Oct 8, 2025
@nshy
Copy link
Contributor Author

nshy commented Oct 13, 2025

I also avoid using mempool to keep batches data as discussed f2f. For that we start to keep vclock in struct synchro_request and struct raft_request right in these structures (done in distinct commit). Also we need to fix pointer to header for struct request in batch (see xlog_batch_get()).

diff
diff --git a/src/box/xlog.c b/src/box/xlog.c
index bab4bd4bb1..b1bde679aa 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -2245,7 +2245,6 @@ xlog_batch_destroy(struct xlog_batch *batch)
 		xlog_batch_get(batch, i, &entry);
 		if (entry->error != NULL)
 			error_unref(entry->error);
-		mempool_free(&batch->cursor->entry_pool, entry);
 	}
 	ibuf_destroy(&batch->data);
 	ibuf_destroy(&batch->entries);
@@ -2260,8 +2259,6 @@ xlog_entry_cursor_create(struct xlog_entry_cursor *cursor,
 	cursor->search_magic = false;
 	cursor->eof = false;
 	cursor->error = NULL;
-	mempool_create(&cursor->entry_pool, cord_slab_cache(),
-		       sizeof(struct xlog_entry));
 }
 
 void
@@ -2271,7 +2268,6 @@ xlog_entry_cursor_destroy(struct xlog_entry_cursor *cursor)
 		xlog_cursor_close(cursor->raw_cursor, false);
 	if (cursor->error != NULL)
 		error_unref(cursor->error);
-	mempool_destroy(&cursor->entry_pool);
 	TRASH(cursor);
 }
 
@@ -2330,19 +2326,18 @@ xlog_entry_cursor_next_batch(struct xlog_entry_cursor *cursor,
 		}
 		return 1;
 	}
-	batch->cursor = cursor;
 	batch->entry_count = 0;
 	ibuf_create(&batch->entries, &cord()->slabc,
 		    /*start_capacity=*/16 * 1024);
 	while (ibuf_used(&batch->data) != 0) {
 		/* Sic: alignment should be correct here. */
-		struct xlog_entry *entry = xmempool_alloc(&cursor->entry_pool);
+		struct xlog_entry *entry =
+			ibuf_alloc(&batch->entries, sizeof(*entry));
 		rc = xrow_decode(&entry->header,
 				 (const char **)&batch->data.rpos,
 				 (const char *)batch->data.wpos,
 				 /*end_is_exact=*/false);
 		if (rc != 0) {
-			mempool_free(&cursor->entry_pool, entry);
 			diag_log();
 			diag_set(XlogError, "can't parse row");
 			if (batch->entry_count == 0) {
@@ -2356,9 +2351,6 @@ xlog_entry_cursor_next_batch(struct xlog_entry_cursor *cursor,
 		}
 		batch->entry_count++;
 		entry->error = NULL;
-		struct xlog_entry **entry_ptr =
-			xibuf_alloc(&batch->entries, sizeof(*entry_ptr));
-		*entry_ptr = entry;
 		if (xlog_entry_decode(entry) != 0) {
 			entry->error = diag_last_error(diag_get());
 			error_ref(entry->error);
diff --git a/src/box/xlog.h b/src/box/xlog.h
index b3ead949e2..ff743cf344 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -42,7 +42,6 @@
 
 #include "small/ibuf.h"
 #include "small/obuf.h"
-#include "small/mempool.h"
 
 struct iovec;
 struct xrow_header;
@@ -925,13 +924,21 @@ struct xlog_entry {
 	struct error *error;
 };
 
+/** Batch of `struct xlog_entry`. */
+struct xlog_batch {
+	/** Raw batch data. */
+	struct ibuf data;
+	/** Buffer holding array of `struct xlog_entry`. */
+	struct ibuf entries;
+	/** Number of entries. */
+	size_t entry_count;
+};
+
 /** Cursor to read `struct xlog_entry`. */
 struct
 xlog_entry_cursor {
 	/** Cursor used to get raw batch data. */
 	struct xlog_cursor *raw_cursor;
-	/** Pool for `struct xlog_entry *` objects. */
-	struct mempool entry_pool;
 	/** Set if last batch reading failed. */
 	bool search_magic;
 	/** Set if EOF is reached. */
@@ -940,18 +947,6 @@ xlog_entry_cursor {
 	struct error *error;
 };
 
-/** Batch of `struct xlog_entry`. */
-struct xlog_batch {
-	/** The cursor this batch is read from. */
-	struct xlog_entry_cursor *cursor;
-	/** Raw batch data. */
-	struct ibuf data;
-	/** Buffer holding array of `struct xlog_entry *`. */
-	struct ibuf entries;
-	/** Number of entries. */
-	size_t entry_count;
-};
-
 /**
  * Get xlog entry at `index` from the `batch`.
  *
@@ -965,8 +960,14 @@ xlog_batch_get(struct xlog_batch *batch, size_t index,
 	       struct xlog_entry **entry)
 {
 	assert(index < batch->entry_count);
-	struct xlog_entry **entries = (struct xlog_entry **)batch->entries.rpos;
-	*entry = entries[index];
+	struct xlog_entry *entries = (struct xlog_entry *)batch->entries.rpos;
+	*entry = &entries[index];
+	/*
+	 * DML has header reference that may become invalid due to
+	 * reallocations during parsing the batch. Fix header reference.
+	 */
+	if (iproto_type_is_dml((*entry)->header.type))
+		(*entry)->dml.header = &(*entry)->header;
 	if ((*entry)->error != NULL) {
 		diag_set_error(diag_get(), (*entry)->error);
 		return -1;
@@ -974,12 +975,7 @@ xlog_batch_get(struct xlog_batch *batch, size_t index,
 	return 0;
 }
 
-/**
- * Destroys the batch.
- *
- * The batch should be destroyed before the cursor from which it is read from
- * is destroyed.
- */
+/** Destroys the batch. */
 void
 xlog_batch_destroy(struct xlog_batch *batch);
 
diff --git a/src/box/xlog_reader.c b/src/box/xlog_reader.c
index daf9b18403..7718b6e64e 100644
--- a/src/box/xlog_reader.c
+++ b/src/box/xlog_reader.c
@@ -303,12 +303,17 @@ xlog_reader_f(va_list ap)
 		rc = 0;
 	}
 
-	while (reader->spare_msg == NULL)
-		fiber_yield();
-
 	if (opened)
 		xlog_entry_cursor_destroy(&cursor);
 
+	/*
+	 * cbus_endpoint_destroy() below is not enough. This loop is required
+	 * to avoid race on creation. The thread may exit before even TX
+	 * is connected to its endpoint.
+	 */
+	while (reader->spare_msg == NULL)
+		fiber_yield();
+
 	cbus_endpoint_destroy(&endpoint, NULL);
 	cpipe_destroy(&reader->tx_pipe);
 

@nshy nshy requested a review from sergepetrenko October 13, 2025 10:07
@nshy nshy assigned locker and unassigned nshy Oct 13, 2025
@sergepetrenko sergepetrenko self-assigned this Oct 13, 2025
@locker locker assigned nshy and unassigned locker Oct 15, 2025
It is just `struct ibuf` actually. The size we add to it we use in
`xlog_tx_cursor_pos()` to calculate `ibuf_pos()`.

We don't need `xlog_tx_cursor_*` functions in header and can open code
it as they used only once. Exception is `xlog_tx_cursor_create()` which
is renamed to `xlog_read_tx()`.

Part of tarantool#11888

NO_TEST=refactoring
NO_CHANGELOG=refactoring
NO_DOC=refactoring
@nshy nshy force-pushed the recovery-thread branch 2 times, most recently from 1058e93 to 6aa7bcc Compare October 16, 2025 17:41
@nshy nshy assigned locker and unassigned nshy Oct 17, 2025
@locker locker assigned nshy and unassigned locker Oct 17, 2025
@nshy nshy removed their assignment Oct 20, 2025
Copy link
Collaborator

@sergepetrenko sergepetrenko left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the patch!
I've only looked over the box: keep vclock in synchro and raft request commit. LGTM.

@sergepetrenko sergepetrenko assigned nshy and unassigned sergepetrenko Oct 22, 2025
@nshy nshy added the full-ci Enables all tests for a pull request label Oct 23, 2025
@nshy
Copy link
Contributor Author

nshy commented Oct 23, 2025

Fixed fuzzer after PR changes (commit "box: keep vclock in synchro and raft request").

--- a/test/fuzz/xrow_decode_raft_fuzzer.c
+++ b/test/fuzz/xrow_decode_raft_fuzzer.c
@@ -39,8 +39,7 @@ LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
        row.bodycnt = 1;

        struct raft_request request = {0};
-       struct vclock vclock = {0};
-       xrow_decode_raft(&row, &request, &vclock);
+       xrow_decode_raft(&row, &request);

        return 0;
 }

nshy added 2 commits October 23, 2025 10:50
Currently we keep a pointer to vclock in `struct synchro_request` and
`struct raft_request` unlikely to the other requests. There we keep only
pointers to MsgPack data but keep parsed values right in the request
struct. Thus this makes these two requests memory management different
from the other requests. Let's align them with others.

Part of tarantool#11888

NO_CHANGELOG=refactoring
NO_DOC=refactoring
Validating requests MsgPack and parsing the requests body turns out to
be significant part of recovery in terms of CPU. Let's move it to
a distinct thread. This is similar to iproto and applier where we have
a distinct thread(s) for that.

Snapshot is written in batches of about 128k
(`XLOG_TX_AUTOCOMMIT_THRESHOLD`). The recovery thread reads the batch,
parses its requests and send to TX to process. The maximum number of
read but not processed batches is limited to 2 to make a flow control.

Closes tarantool#11888
Part of tarantool#11452

============================
= Performance test results =
============================

Recovery performance is evaluated using below perf test. The 1.10.5
performance is taken from the previous evaluations for the issue.

```
tarantool perf/lua/recovery.lua \
  --row_count 10000000 --column_count 10 --recovery_count 5 \
  --preload_script "require('compat').box_recovery_triggers_deprecation = 'new'"
```

Run                 | Perf, krps |
---------------------------------|
master, current     |    1060    |
---------------------------------|
master, patched     |    1800    |
---------------------------------|
1.10.5              |    1315    |

So introducing snapshot reader thread gives 70% boost for recovery from
snapshot. Now master is 37% faster than 1.10.5.

If `box_recovery_triggers_deprecation = 'old'` then performance is only
990 krps.

NO_TEST=performance
NO_DOC=performance
@nshy nshy requested a review from ligurio as a code owner October 23, 2025 07:50
@nshy nshy assigned locker and nshy and unassigned nshy and locker Oct 23, 2025
Copy link
Member

@ligurio ligurio left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changes in test/fuzz/xrow_decode_raft_fuzzer.c - LGTM

@nshy nshy assigned locker and unassigned nshy Oct 23, 2025
@locker locker merged commit 08329dc into tarantool:master Oct 23, 2025
44 checks passed
@nshy nshy deleted the recovery-thread branch October 23, 2025 10:28
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

full-ci Enables all tests for a pull request

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Use additional thread to improve snapshot recovery perf

6 participants