This document provides comprehensive guidance on optimizing performance when using the dynamic-ssz library.
Dynamic-ssz employs a hybrid approach that balances flexibility with performance:
- Static types: Automatically uses fastssz for optimal performance
- Dynamic types: Uses reflection-based processing for flexibility
- Caching: Extensive type caching reduces overhead
- Buffer management: Efficient memory usage patterns
Based on testing with BeaconBlocks and BeaconStates from kurtosis testnets:
BeaconBlock (10,000 operations):
- fastssz only: [8ms unmarshal / 3ms marshal / 88ms hash]
- dynssz only: [27ms unmarshal / 12ms marshal / 63ms hash]
- dynssz + fastssz: [8ms unmarshal / 3ms marshal / 64ms hash] ← Recommended
BeaconState (10,000 operations):
- fastssz only: [5.8s unmarshal / 5.0s marshal / 73s hash]
- dynssz only: [22.5s unmarshal / 12.3s marshal / 40s hash]
- dynssz + fastssz: [5.7s unmarshal / 4.9s marshal / 37s hash] ← Recommended
BeaconBlock (10,000 operations):
- fastssz only: Failed (unmarshal error)
- dynssz only: [44ms unmarshal / 29ms marshal / 90ms hash]
- dynssz + fastssz: [22ms unmarshal / 13ms marshal / 151ms hash] ← Only option
BeaconState (10,000 operations):
- fastssz only: Failed (unmarshal error)
- dynssz only: [796ms unmarshal / 407ms marshal / 1816ms hash]
- dynssz + fastssz: [459ms unmarshal / 244ms marshal / 4712ms hash] ← Only option
Always reuse DynSsz instances for maximum performance:
// ❌ Bad: Creates new instance every time
func processBlock(block *phase0.SignedBeaconBlock) {
ds := dynssz.NewDynSsz(specs) // Expensive!
data, _ := ds.MarshalSSZ(block)
}
// ✅ Good: Reuse instance
type Processor struct {
dynSsz *dynssz.DynSsz
}
func (p *Processor) processBlock(block *phase0.SignedBeaconBlock) {
data, _ := p.dynSsz.MarshalSSZ(block)
}Use MarshalSSZTo with pre-allocated buffers:
// ❌ Bad: Allocates new buffer each time
func processBlocks(ds *dynssz.DynSsz, blocks []Block) {
for _, block := range blocks {
data, _ := ds.MarshalSSZ(block) // New allocation each time
}
}
// ✅ Good: Reuse buffer
func processBlocks(ds *dynssz.DynSsz, blocks []Block) {
buf := make([]byte, 0, 1024*1024) // 1MB buffer
for _, block := range blocks {
buf = buf[:0] // Reset length, keep capacity
data, _ := ds.MarshalSSZTo(block, buf)
// Process data...
}
}Pre-calculate sizes when possible:
// ✅ Good: Calculate size once for batch operations
func batchProcess(ds *dynssz.DynSsz, items []Item) {
// Calculate expected size for buffer allocation
totalSize := 0
for _, item := range items {
size, _ := ds.SizeSSZ(item)
totalSize += size
}
buf := make([]byte, 0, totalSize)
for _, item := range items {
buf, _ = ds.MarshalSSZTo(item, buf)
}
}Leverage type cache for repeated operations:
// ✅ Good: Type cache is automatically used
type Service struct {
dynSsz *dynssz.DynSsz
}
func (s *Service) processMultipleBlocks(blocks []*phase0.SignedBeaconBlock) {
// First block builds type cache
// Subsequent blocks reuse cached type information
for _, block := range blocks {
data, _ := s.dynSsz.MarshalSSZ(block)
// Process...
}
}Choose appropriate buffer sizes:
// Small objects (< 1KB)
buf := make([]byte, 0, 1024)
// Medium objects (< 100KB)
buf := make([]byte, 0, 100*1024)
// Large objects (BeaconState, etc.)
buf := make([]byte, 0, 2*1024*1024) // 2MBUse sync.Pool for high-frequency operations:
var bufferPool = sync.Pool{
New: func() interface{} {
return make([]byte, 0, 1024*1024)
},
}
func processWithPool(ds *dynssz.DynSsz, block Block) {
buf := bufferPool.Get().([]byte)
defer bufferPool.Put(buf[:0])
data, _ := ds.MarshalSSZTo(block, buf)
// Process data...
}Be careful with large slices:
// ❌ Bad: May cause memory leak
func process(ds *dynssz.DynSsz, largeData []byte) []byte {
return largeData[100:200] // Still references entire slice
}
// ✅ Good: Copy when necessary
func process(ds *dynssz.DynSsz, largeData []byte) []byte {
result := make([]byte, 100)
copy(result, largeData[100:200])
return result
}Options are set through the constructor. The defaults are already optimized for performance:
// Default: fastssz enabled, fast hashing enabled, verbose off
ds := dynssz.NewDynSsz(specs)
// Only disable fastssz if you need pure reflection behavior
ds := dynssz.NewDynSsz(specs, dynssz.WithNoFastSsz())
// Enable verbose logging only for debugging
ds := dynssz.NewDynSsz(specs, dynssz.WithVerbose())import _ "net/http/pprof"
import "net/http"
// Add to your main function
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
// Then profile with:
// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30// Check memory usage
func checkMemory() {
var m runtime.MemStats
runtime.ReadMemStats(&m)
fmt.Printf("Alloc = %d KB", bToKb(m.Alloc))
fmt.Printf("TotalAlloc = %d KB", bToKb(m.TotalAlloc))
fmt.Printf("Sys = %d KB", bToKb(m.Sys))
}
func bToKb(b uint64) uint64 {
return b / 1024
}func BenchmarkMarshalSSZ(b *testing.B) {
ds := dynssz.NewDynSsz(specs)
block := createTestBlock()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := ds.MarshalSSZ(block)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkMarshalSSZTo(b *testing.B) {
ds := dynssz.NewDynSsz(specs)
block := createTestBlock()
buf := make([]byte, 0, 1024)
b.ResetTimer()
for i := 0; i < b.N; i++ {
buf = buf[:0]
_, err := ds.MarshalSSZTo(block, buf)
if err != nil {
b.Fatal(err)
}
}
}type BatchProcessor struct {
dynSsz *dynssz.DynSsz
buffer []byte
}
func (bp *BatchProcessor) ProcessBatch(items []interface{}) error {
for _, item := range items {
bp.buffer = bp.buffer[:0]
data, err := bp.dynSsz.MarshalSSZTo(item, bp.buffer)
if err != nil {
return err
}
// Process data...
}
return nil
}type Worker struct {
dynSsz *dynssz.DynSsz
buffer []byte
}
func (w *Worker) Process(item interface{}) ([]byte, error) {
w.buffer = w.buffer[:0]
return w.dynSsz.MarshalSSZTo(item, w.buffer)
}
func CreateWorkerPool(size int, specs map[string]any) []*Worker {
workers := make([]*Worker, size)
for i := range workers {
workers[i] = &Worker{
dynSsz: dynssz.NewDynSsz(specs),
buffer: make([]byte, 0, 1024*1024),
}
}
return workers
}type StreamProcessor struct {
dynSsz *dynssz.DynSsz
writer io.Writer
buffer []byte
}
func (sp *StreamProcessor) ProcessStream(items <-chan interface{}) error {
for item := range items {
sp.buffer = sp.buffer[:0]
data, err := sp.dynSsz.MarshalSSZTo(item, sp.buffer)
if err != nil {
return err
}
if _, err := sp.writer.Write(data); err != nil {
return err
}
}
return nil
}// ❌ Don't do this in hot paths
for _, block := range blocks {
ds := dynssz.NewDynSsz(specs) // Creates new type cache each time!
data, _ := ds.MarshalSSZ(block)
}// ❌ Allocates new buffer each time
for _, block := range blocks {
data, _ := ds.MarshalSSZ(block) // New allocation
}// ❌ Buffer too small, causes reallocations
buf := make([]byte, 0, 10) // Too small for typical blocks
for _, block := range blocks {
buf, _ = ds.MarshalSSZTo(block, buf[:0])
}- Throughput: Operations per second
- Latency: Time per operation
- Memory usage: Allocation rate and GC pressure
- Cache hit rate: Type cache effectiveness
type PerformanceMonitor struct {
totalOps int64
totalTime time.Duration
startTime time.Time
}
func (pm *PerformanceMonitor) StartOperation() func() {
start := time.Now()
return func() {
atomic.AddInt64(&pm.totalOps, 1)
pm.totalTime += time.Since(start)
}
}
func (pm *PerformanceMonitor) Stats() (float64, time.Duration) {
ops := atomic.LoadInt64(&pm.totalOps)
elapsed := time.Since(pm.startTime)
throughput := float64(ops) / elapsed.Seconds()
avgLatency := pm.totalTime / time.Duration(ops)
return throughput, avgLatency
}By following these performance guidelines, you can achieve optimal performance with dynamic-ssz while maintaining the flexibility it provides for handling different Ethereum presets and custom specifications.