Skip to content

Commit 0c10bd5

Browse files
mergify[bot]ValarDragonmelekes
committed
perf(blockstore): Add LRU caches to blockstore operations used in consensus (backport cometbft#3003) (cometbft#3083)
Closes cometbft#2844 We are seeing that the blockstore loading operations get used in hot loops within gossip routines, and queryMaj23 routines. This PR reduces that overhead using an LRU cache. The LRU cache does have a mutex on every get, but the time with the LRU cache is 9x faster than without (before even adding in DB overheads), due to the proto unmarshalling saved. We could imagine a setup where we avoided a lock there entirely. I don't think this is worth right now, since the new code is 9x faster, and these mostly appear in catchup code which should not be highly contended for across peers at the same time. With the new benchmark I added: OLD: ``` BenchmarkRepeatedLoadSeenCommit-12 24447 54691 ns/op 46495 B/op 319 allocs/op ``` NEW: ``` BenchmarkRepeatedLoadSeenCommit-12 224131 6401 ns/op 8320 B/op 2 allocs/op ``` It turns out these gossip routines don't need mutative copies, so we could optimize out the large allocation in the future if we want. 1 hour cpu profile that shows this appearing in prod: ![image](https://github.com/cometbft/cometbft/assets/6440154/5a7e0f02-8385-4c01-aa6a-dba2a2bf376d) The state machine execution time here for context is 92 seconds. So this is adding up in system load (and GC! The GC load is 52GB, the entire trace is 200GB, with other parts being optimized down from recent PRs) --- - [ ] Tests written/updated - [ ] Changelog entry added in `.changelog` (we use [unclog](https://github.com/informalsystems/unclog) to manage our changelog) - [ ] Updated relevant documentation (`docs/` or `spec/`) and code comments - [ ] Title follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) spec <hr>This is an automatic backport of pull request cometbft#3003 done by [Mergify](https://mergify.com). --------- Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com> Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com>
1 parent 68c9c3f commit 0c10bd5

7 files changed

Lines changed: 85 additions & 3 deletions

File tree

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- [`blockstore`] Use LRU caches in blockstore, significiantly improving consensus gossip routine performance
2+
([\#3003](https://github.com/cometbft/cometbft/issues/3003)

.golangci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ linters-settings:
6464
- github.com/google
6565
- github.com/gorilla/websocket
6666
- github.com/informalsystems/tm-load-test/pkg/loadtest
67+
- github.com/hashicorp/golang-lru/v2
6768
- github.com/lib/pq
6869
- github.com/libp2p/go-buffer-pool
6970
- github.com/Masterminds/semver/v3

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ require (
5252
github.com/cosmos/gogoproto v1.4.2
5353
github.com/go-git/go-git/v5 v5.11.0
5454
github.com/golang/protobuf v1.5.3
55+
github.com/hashicorp/golang-lru/v2 v2.0.7
5556
github.com/oasisprotocol/curve25519-voi v0.0.0-20220708102147-0a8a51822cae
5657
github.com/vektra/mockery/v2 v2.14.0
5758
golang.org/x/sync v0.3.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mO
494494
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
495495
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
496496
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
497+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
498+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
497499
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
498500
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
499501
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=

store/bench_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package store
2+
3+
import (
4+
"bytes"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
9+
"github.com/cometbft/cometbft/internal/test"
10+
"github.com/cometbft/cometbft/libs/log"
11+
"github.com/cometbft/cometbft/types"
12+
cmttime "github.com/cometbft/cometbft/types/time"
13+
)
14+
15+
// TestLoadBlockExtendedCommit tests loading the extended commit for a previously
16+
// saved block. The load method should return nil when only a commit was saved and
17+
// return the extended commit otherwise.
18+
func BenchmarkRepeatedLoadSeenCommitSameBlock(b *testing.B) {
19+
state, bs, cleanup := makeStateAndBlockStore(log.NewTMLogger(new(bytes.Buffer)))
20+
defer cleanup()
21+
h := bs.Height() + 1
22+
block := state.MakeBlock(h, test.MakeNTxs(h, 10), new(types.Commit), nil, state.Validators.GetProposer().Address)
23+
seenCommit := makeTestCommit(block.Header.Height, cmttime.Now())
24+
ps, err := block.MakePartSet(types.BlockPartSizeBytes)
25+
require.NoError(b, err)
26+
bs.SaveBlock(block, ps, seenCommit)
27+
28+
// sanity check
29+
res := bs.LoadSeenCommit(block.Height)
30+
require.Equal(b, seenCommit, res)
31+
32+
b.ResetTimer()
33+
for i := 0; i < b.N; i++ {
34+
res := bs.LoadSeenCommit(block.Height)
35+
require.NotNil(b, res)
36+
}
37+
}

store/store.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"strconv"
66

7+
lru "github.com/hashicorp/golang-lru/v2"
8+
79
dbm "github.com/cometbft/cometbft-db"
810
"github.com/cosmos/gogoproto/proto"
911

@@ -41,17 +43,35 @@ type BlockStore struct {
4143
mtx cmtsync.RWMutex
4244
base int64
4345
height int64
46+
47+
seenCommitCache *lru.Cache[int64, *types.Commit]
48+
blockCommitCache *lru.Cache[int64, *types.Commit]
4449
}
4550

4651
// NewBlockStore returns a new BlockStore with the given DB,
4752
// initialized to the last height that was committed to the DB.
4853
func NewBlockStore(db dbm.DB) *BlockStore {
4954
bs := LoadBlockStoreState(db)
50-
return &BlockStore{
55+
bStore := &BlockStore{
5156
base: bs.Base,
5257
height: bs.Height,
5358
db: db,
5459
}
60+
bStore.addCaches()
61+
return bStore
62+
}
63+
64+
func (bs *BlockStore) addCaches() {
65+
var err error
66+
// err can only occur if the argument is non-positive, so is impossible in context.
67+
bs.blockCommitCache, err = lru.New[int64, *types.Commit](100)
68+
if err != nil {
69+
panic(err)
70+
}
71+
bs.seenCommitCache, err = lru.New[int64, *types.Commit](100)
72+
if err != nil {
73+
panic(err)
74+
}
5575
}
5676

5777
func (bs *BlockStore) IsEmpty() bool {
@@ -224,6 +244,10 @@ func (bs *BlockStore) LoadBlockMetaByHash(hash []byte) *types.BlockMeta {
224244
// and it comes from the block.LastCommit for `height+1`.
225245
// If no commit is found for the given height, it returns nil.
226246
func (bs *BlockStore) LoadBlockCommit(height int64) *types.Commit {
247+
comm, ok := bs.blockCommitCache.Get(height)
248+
if ok {
249+
return comm.Clone()
250+
}
227251
pbc := new(cmtproto.Commit)
228252
bz, err := bs.db.Get(calcBlockCommitKey(height))
229253
if err != nil {
@@ -240,13 +264,18 @@ func (bs *BlockStore) LoadBlockCommit(height int64) *types.Commit {
240264
if err != nil {
241265
panic(fmt.Sprintf("Error reading block commit: %v", err))
242266
}
243-
return commit
267+
bs.blockCommitCache.Add(height, commit)
268+
return commit.Clone()
244269
}
245270

246271
// LoadSeenCommit returns the locally seen Commit for the given height.
247272
// This is useful when we've seen a commit, but there has not yet been
248273
// a new block at `height + 1` that includes this commit in its block.LastCommit.
249274
func (bs *BlockStore) LoadSeenCommit(height int64) *types.Commit {
275+
comm, ok := bs.seenCommitCache.Get(height)
276+
if ok {
277+
return comm.Clone()
278+
}
250279
pbc := new(cmtproto.Commit)
251280
bz, err := bs.db.Get(calcSeenCommitKey(height))
252281
if err != nil {
@@ -264,7 +293,8 @@ func (bs *BlockStore) LoadSeenCommit(height int64) *types.Commit {
264293
if err != nil {
265294
panic(fmt.Errorf("error from proto commit: %w", err))
266295
}
267-
return commit
296+
bs.seenCommitCache.Add(height, commit)
297+
return commit.Clone()
268298
}
269299

270300
// PruneBlocks removes block up to (but not including) a height. It returns number of blocks pruned.

types/block.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,15 @@ func (commit *Commit) GetVote(valIdx int32) *Vote {
801801
}
802802
}
803803

804+
// Clone creates a deep copy of this commit.
805+
func (commit *Commit) Clone() *Commit {
806+
sigs := make([]CommitSig, len(commit.Signatures))
807+
copy(sigs, commit.Signatures)
808+
commCopy := *commit
809+
commCopy.Signatures = sigs
810+
return &commCopy
811+
}
812+
804813
// VoteSignBytes returns the bytes of the Vote corresponding to valIdx for
805814
// signing.
806815
//

0 commit comments

Comments
 (0)