perf(types): 3x speedup MakePartSet (#3117)

ValarDragon · web-flow · commit f94dea50da75 · 2024-05-28T06:02:30.000Z
This PR adds some benchmarks, and significantly speeds up types.MakePartSet, and Partset.AddPart. (Used by the block proposer, and every consensus instance) It does so by doing two things: - Saving mutexes on the newly created bit array, by defaulting every value to True (rather than setting it in a loop that goes through a mutex) - Uses the same hash object throughout, and avoids an extra copy of every leaf. (main speedup) I do the same hash optimization for proof.Verify, which is used in the add block part codepath for both the proposer and every full node. New: ``` BenchmarkMakePartSet/nParts=1-12 38616 29817 ns/op 568 B/op 12 allocs/op BenchmarkMakePartSet/nParts=2-12 19888 59866 ns/op 1000 B/op 22 allocs/op BenchmarkMakePartSet/nParts=3-12 12979 95691 ns/op 1528 B/op 33 allocs/op BenchmarkMakePartSet/nParts=4-12 8688 128192 ns/op 2024 B/op 44 allocs/op BenchmarkMakePartSet/nParts=5-12 7308 155224 ns/op 2888 B/op 57 allocs/op ``` Old: ``` BenchmarkMakePartSet/nParts=1-12 16647 106545 ns/op 74169 B/op 12 allocs/op BenchmarkMakePartSet/nParts=2-12 10000 106361 ns/op 148329 B/op 23 allocs/op BenchmarkMakePartSet/nParts=3-12 6992 337644 ns/op 222587 B/op 35 allocs/op BenchmarkMakePartSet/nParts=4-12 3488 480109 ns/op 296811 B/op 47 allocs/op BenchmarkMakePartSet/nParts=5-12 2228 557768 ns/op 371404 B/op 61 allocs/op ``` System wide, this is definitely not our issue (looks like roughly .1ms per blockpart), but still definitely useful time to remove --- #### PR checklist - [x] Tests written/updated - [x] Changelog entry added in `.changelog` (we use [unclog](https://github.com/informalsystems/unclog) to manage our changelog) - [x] Updated relevant documentation (`docs/` or `spec/`) and code comments - [x] Title follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) spec
diff --git a/.changelog/unreleased/improvements/3117-significantly-speedup-make-partset.md b/.changelog/unreleased/improvements/3117-significantly-speedup-make-partset.md
@@ -0,0 +1,2 @@
+- [`types`] Significantly speedup types.MakePartSet and types.AddPart, which are used in creating a block proposal
+  ([\#3117](https://github.com/cometbft/cometbft/issues/3117)
diff --git a/crypto/merkle/bench_test.go b/crypto/merkle/bench_test.go
@@ -40,3 +40,25 @@ func BenchmarkInnerHash(b *testing.B) {
 		b.Fatal("Benchmark did not run!")
 	}
 }
+
+// Benchmark the time it takes to hash a 64kb leaf, which is the size of
+// a block part.
+// This helps determine whether its worth parallelizing this hash for the proposer.
+func BenchmarkLeafHash64kb(b *testing.B) {
+	b.ReportAllocs()
+	leaf := make([]byte, 64*1024)
+	hash := sha256.New()
+
+	for i := 0; i < b.N; i++ {
+		leaf[0] = byte(i)
+		got := leafHashOpt(hash, leaf)
+		if g, w := len(got), sha256.Size; g != w {
+			b.Fatalf("size discrepancy: got %d, want %d", g, w)
+		}
+		sink = got
+	}
+
+	if sink == nil {
+		b.Fatal("Benchmark did not run!")
+	}
+}
diff --git a/crypto/merkle/proof.go b/crypto/merkle/proof.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"hash"
 
 	cmtcrypto "github.com/cometbft/cometbft/api/cometbft/crypto/v1"
 	"github.com/cometbft/cometbft/crypto/tmhash"
@@ -91,13 +92,14 @@ func (sp *Proof) Verify(rootHash []byte, leaf []byte) error {
 			Err: errors.New("negative proof index"),
 		}
 	}
-	leafHash := leafHash(leaf)
+	hash := tmhash.New()
+	leafHash := leafHashOpt(hash, leaf)
 	if !bytes.Equal(sp.LeafHash, leafHash) {
 		return ErrInvalidHash{
 			Err: fmt.Errorf("leaf %x, want %x", sp.LeafHash, leafHash),
 		}
 	}
-	computedHash, err := sp.computeRootHash()
+	computedHash, err := sp.computeRootHash(hash)
 	if err != nil {
 		return ErrInvalidHash{
 			Err: fmt.Errorf("compute root hash: %w", err),
@@ -112,8 +114,9 @@ func (sp *Proof) Verify(rootHash []byte, leaf []byte) error {
 }
 
 // Compute the root hash given a leaf hash.
-func (sp *Proof) computeRootHash() ([]byte, error) {
+func (sp *Proof) computeRootHash(hash hash.Hash) ([]byte, error) {
 	return computeHashFromAunts(
+		hash,
 		sp.Index,
 		sp.Total,
 		sp.LeafHash,
@@ -200,7 +203,7 @@ func ProofFromProto(pb *cmtcrypto.Proof) (*Proof, error) {
 // Use the leafHash and innerHashes to get the root merkle hash.
 // If the length of the innerHashes slice isn't exactly correct, the result is nil.
 // Recursive impl.
-func computeHashFromAunts(index, total int64, leafHash []byte, innerHashes [][]byte) ([]byte, error) {
+func computeHashFromAunts(hash hash.Hash, index, total int64, leafHash []byte, innerHashes [][]byte) ([]byte, error) {
 	if index >= total || index < 0 || total <= 0 {
 		return nil, fmt.Errorf("invalid index %d and/or total %d", index, total)
 	}
@@ -218,18 +221,18 @@ func computeHashFromAunts(index, total int64, leafHash []byte, innerHashes [][]b
 		}
 		numLeft := getSplitPoint(total)
 		if index < numLeft {
-			leftHash, err := computeHashFromAunts(index, numLeft, leafHash, innerHashes[:len(innerHashes)-1])
+			leftHash, err := computeHashFromAunts(hash, index, numLeft, leafHash, innerHashes[:len(innerHashes)-1])
 			if err != nil {
 				return nil, err
 			}
 
-			return innerHash(leftHash, innerHashes[len(innerHashes)-1]), nil
+			return innerHashOpt(hash, leftHash, innerHashes[len(innerHashes)-1]), nil
 		}
-		rightHash, err := computeHashFromAunts(index-numLeft, total-numLeft, leafHash, innerHashes[:len(innerHashes)-1])
+		rightHash, err := computeHashFromAunts(hash, index-numLeft, total-numLeft, leafHash, innerHashes[:len(innerHashes)-1])
 		if err != nil {
 			return nil, err
 		}
-		return innerHash(innerHashes[len(innerHashes)-1], rightHash), nil
+		return innerHashOpt(hash, innerHashes[len(innerHashes)-1], rightHash), nil
 	}
 }
 
@@ -266,18 +269,22 @@ func (spn *ProofNode) FlattenAunts() [][]byte {
 // trails[0].Hash is the leaf hash for items[0].
 // trails[i].Parent.Parent....Parent == root for all i.
 func trailsFromByteSlices(items [][]byte) (trails []*ProofNode, root *ProofNode) {
+	return trailsFromByteSlicesInternal(tmhash.New(), items)
+}
+
+func trailsFromByteSlicesInternal(hash hash.Hash, items [][]byte) (trails []*ProofNode, root *ProofNode) {
 	// Recursive impl.
 	switch len(items) {
 	case 0:
 		return []*ProofNode{}, &ProofNode{emptyHash(), nil, nil, nil}
 	case 1:
-		trail := &ProofNode{leafHash(items[0]), nil, nil, nil}
+		trail := &ProofNode{leafHashOpt(hash, items[0]), nil, nil, nil}
 		return []*ProofNode{trail}, trail
 	default:
 		k := getSplitPoint(int64(len(items)))
-		lefts, leftRoot := trailsFromByteSlices(items[:k])
-		rights, rightRoot := trailsFromByteSlices(items[k:])
-		rootHash := innerHash(leftRoot.Hash, rightRoot.Hash)
+		lefts, leftRoot := trailsFromByteSlicesInternal(hash, items[:k])
+		rights, rightRoot := trailsFromByteSlicesInternal(hash, items[k:])
+		rootHash := innerHashOpt(hash, leftRoot.Hash, rightRoot.Hash)
 		root := &ProofNode{rootHash, nil, nil, nil}
 		leftRoot.Parent = root
 		leftRoot.Right = rightRoot
diff --git a/crypto/merkle/proof_value.go b/crypto/merkle/proof_value.go
@@ -104,7 +104,7 @@ func (op ValueOp) Run(args [][]byte) ([][]byte, error) {
 		}
 	}
 
-	rootHash, err := op.Proof.computeRootHash()
+	rootHash, err := op.Proof.computeRootHash(tmhash.New())
 	if err != nil {
 		return nil, err
 	}
diff --git a/types/part_set.go b/types/part_set.go
@@ -180,21 +180,20 @@ func NewPartSetFromData(data []byte, partSize uint32) *PartSet {
 	total := (uint32(len(data)) + partSize - 1) / partSize
 	parts := make([]*Part, total)
 	partsBytes := make([][]byte, total)
-	partsBitArray := bits.NewBitArray(int(total))
 	for i := uint32(0); i < total; i++ {
 		part := &Part{
 			Index: i,
 			Bytes: data[i*partSize : cmtmath.MinInt(len(data), int((i+1)*partSize))],
 		}
 		parts[i] = part
 		partsBytes[i] = part.Bytes
-		partsBitArray.SetIndex(int(i), true)
 	}
 	// Compute merkle proofs
 	root, proofs := merkle.ProofsFromByteSlices(partsBytes)
 	for i := uint32(0); i < total; i++ {
 		parts[i].Proof = *proofs[i]
 	}
+	partsBitArray := bits.NewBitArrayFromFn(int(total), func(int) bool { return true })
 	return &PartSet{
 		total:         total,
 		hash:          root,
diff --git a/types/part_set_test.go b/types/part_set_test.go
@@ -1,6 +1,7 @@
 package types
 
 import (
+	"fmt"
 	"io"
 	"testing"
 
@@ -219,3 +220,15 @@ func TestPartProtoBuf(t *testing.T) {
 		}
 	}
 }
+
+func BenchmarkMakePartSet(b *testing.B) {
+	for nParts := 1; nParts <= 5; nParts++ {
+		b.Run(fmt.Sprintf("nParts=%d", nParts), func(b *testing.B) {
+			data := cmtrand.Bytes(testPartSize * nParts)
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				NewPartSetFromData(data, testPartSize)
+			}
+		})
+	}
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+- [`types`] Significantly speedup types.MakePartSet and types.AddPart, which are used in creating a block proposal
	`2`	`+ ([\#3117](https://github.com/cometbft/cometbft/issues/3117)`
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ func (op ValueOp) Run(args [][]byte) ([][]byte, error) {`
`104`	`104`	`}`
`105`	`105`	`}`
`106`	`106`
`107`		`- rootHash, err := op.Proof.computeRootHash()`
	`107`	`+ rootHash, err := op.Proof.computeRootHash(tmhash.New())`
`108`	`108`	`if err != nil {`
`109`	`109`	`return nil, err`
`110`	`110`	`}`