modelindexer: Run active in dedicated goroutine

marclop · marclop · commit 451e7f2ef8e0 · 2022-10-10T10:30:06.000+08:00
Remove the `i.activeMu` from the modelindexer and shared sync primitives
from the indexer structure, instead, run the active indexer in its own
goroutine, minimizing the amount of shared locking that needs to happen
when `ProcessBatch` is called. To allow (potentially) multiple active
indexers from running in parallel, a new internal `bulkItems` channel is
introduced, with a variable buffer depending on how powerful the machine
where APM Server is run. The bigger the machine, the bigger the channel
buffer, and vice versa.

Additionally, the default `FlushBytes` reduced from `5MB` to `2MB`, and
to keep the same total "bufferable" size, the default `MaxRequests` is
increased to 25 (25 * 2mb = 50mb). The motivation behind this change is
is twofold; Allowing the modelindexer to have more background flushes
if enough events are being processed by the APM Server, and, creating
slightly smaller bulk requests to Elasticsearch, since, with the default
settings (5MB FlushBytes), the number of bulk documents is ~`18-24K`.

Initial micro-benchmarks indicate that the performance improvement is
~42% with default settings, and macro-benchmarks ~27%:

```
benchstat old.txt single-lockless.txt
name                  old time/op  new time/op  delta
NoCompression-8       1.11µs ± 1%  1.15µs ± 4%   +4.41%  (p=0.008 n=5+5)
BestSpeed-8           2.03µs ± 3%  1.88µs ± 1%   -7.14%  (p=0.008 n=5+5)
DefaultCompression-8  4.10µs ± 2%  2.36µs ± 2%  -42.32%  (p=0.008 n=5+5)
BestCompression-8     6.95µs ± 1%  4.21µs ± 1%  -39.48%  (p=0.008 n=5+5)

name                  old alloc/op  new alloc/op  delta
NoCompression-8       2.07kB ± 0%   2.14kB ± 0%  +3.44%  (p=0.016 n=4+5)
BestSpeed-8           2.57kB ± 0%   2.56kB ± 0%  -0.31%  (p=0.008 n=5+5)
DefaultCompression-8  2.55kB ± 0%   2.52kB ± 0%  -0.87%  (p=0.008 n=5+5)
BestCompression-8     2.58kB ± 0%   2.57kB ± 0%  -0.52%  (p=0.008 n=5+5)
```

```
name          old events/sec  new events/sec  delta
AgentAll-512  26.8k ± 0%      34.1k ±11%      +27.11%  (p=0.100 n=3+3)
```

Signed-off-by: Marc Lopez Rubio &lt;marc5.12@outlook.com&gt;
diff --git a/internal/beater/beater.go b/internal/beater/beater.go
@@ -886,6 +886,8 @@ func (s *serverRunner) newFinalBatchProcessor(
 		stats := indexer.Stats()
 		v.OnKey("available")
 		v.OnInt(stats.AvailableBulkRequests)
+		v.OnKey("active")
+		v.OnInt(stats.ActiveBulkRequests)
 		v.OnKey("completed")
 		v.OnInt(stats.BulkRequests)
 	})
diff --git a/internal/beater/server_test.go b/internal/beater/server_test.go
@@ -975,8 +975,11 @@ func TestServerElasticsearchOutput(t *testing.T) {
 	assert.Equal(t, map[string]interface{}{
 		"elasticsearch": map[string]interface{}{
 			"bulk_requests": map[string]interface{}{
-				"available": int64(9),
-				"completed": int64(0),
+				"active":     int64(1),
+				"available":  int64(9),
+				"completed":  int64(0),
+				"upscales":   int64(0),
+				"downscales": int64(0),
 			},
 		},
 	}, snapshot)
diff --git a/internal/model/modelindexer/indexer.go b/internal/model/modelindexer/indexer.go
@@ -24,6 +24,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"runtime"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -76,20 +77,17 @@ type Indexer struct {
 	tooManyRequests       int64
 	bytesTotal            int64
 	availableBulkRequests int64
+	activeBulkRequests    int64
 
 	config    Config
 	logger    *logp.Logger
 	available chan *bulkIndexer
+	bulkItems chan elasticsearch.BulkIndexerItem
 	g         errgroup.Group
 
-	mu           sync.RWMutex
-	closing      bool
-	closed       chan struct{}
-	activeMu     sync.Mutex
-	activeCond   *sync.Cond
-	active       *bulkIndexer
-	timer        *time.Timer
-	timerStopped chan struct{}
+	mu      sync.RWMutex
+	closing bool
+	closed  chan struct{}
 }
 
 // Config holds configuration for Indexer.
@@ -106,7 +104,8 @@ type Config struct {
 	// If MaxRequests is less than or equal to zero, the default of 10 will be used.
 	MaxRequests int
 
-	// FlushBytes holds the flush threshold in bytes.
+	// FlushBytes holds the flush threshold in bytes. If Compression is enabled,
+	// The number of events that can be buffered will be greater.
 	//
 	// If FlushBytes is zero, the default of 5MB will be used.
 	FlushBytes int
@@ -133,10 +132,10 @@ func New(client elasticsearch.Client, cfg Config) (*Indexer, error) {
 		)
 	}
 	if cfg.MaxRequests <= 0 {
-		cfg.MaxRequests = 10
+		cfg.MaxRequests = 25
 	}
 	if cfg.FlushBytes <= 0 {
-		cfg.FlushBytes = 5 * 1024 * 1024
+		cfg.FlushBytes = 2 * 1024 * 1024
 	}
 	if cfg.FlushInterval <= 0 {
 		cfg.FlushInterval = 30 * time.Second
@@ -151,9 +150,16 @@ func New(client elasticsearch.Client, cfg Config) (*Indexer, error) {
 		logger:                logger,
 		available:             available,
 		closed:                make(chan struct{}),
-		timerStopped:          make(chan struct{}),
+		// NOTE(marclop) This channel size is arbitrary. It makes sense to set
+		// the buffer channel to a bigger size when more CPUs are available, to
+		// allow slightly more buffering to happen. It will speed up the blocking
+		// ProcessBatch calls, when the input request rate > compression rate.
+		bulkItems: make(
+			chan elasticsearch.BulkIndexerItem,
+			cfg.MaxRequests*runtime.GOMAXPROCS(0),
+		),
 	}
-	indexer.activeCond = sync.NewCond(&indexer.activeMu)
+	indexer.runActiveIndexer()
 	return indexer, nil
 }
 
@@ -167,7 +173,7 @@ func (i *Indexer) Close(ctx context.Context) error {
 	defer i.mu.Unlock()
 	if !i.closing {
 		i.closing = true
-
+		close(i.bulkItems)
 		// Close i.closed when ctx is cancelled,
 		// unblocking any ongoing flush attempts.
 		done := make(chan struct{})
@@ -179,12 +185,6 @@ func (i *Indexer) Close(ctx context.Context) error {
 			case <-ctx.Done():
 			}
 		}()
-
-		i.activeMu.Lock()
-		if i.active != nil && i.timer.Stop() {
-			i.timerStopped <- struct{}{}
-		}
-		i.activeMu.Unlock()
 	}
 	return i.g.Wait()
 }
@@ -200,6 +200,7 @@ func (i *Indexer) Stats() Stats {
 		TooManyRequests:       atomic.LoadInt64(&i.tooManyRequests),
 		BytesTotal:            atomic.LoadInt64(&i.bytesTotal),
 		AvailableBulkRequests: atomic.LoadInt64(&i.availableBulkRequests),
+		ActiveBulkRequests:    atomic.LoadInt64(&i.activeBulkRequests),
 	}
 }
 
@@ -234,54 +235,21 @@ func (i *Indexer) processEvent(ctx context.Context, event *model.APMEvent) error
 	r.indexBuilder.WriteString(event.DataStream.Dataset)
 	r.indexBuilder.WriteByte('-')
 	r.indexBuilder.WriteString(event.DataStream.Namespace)
-	index := r.indexBuilder.String()
-
-	i.activeMu.Lock()
-	defer i.activeMu.Unlock()
-	for i.active != nil && i.active.Len() >= i.config.FlushBytes {
-		// The active bulk indexer is full: wait for it to be
-		// switched out by the background flushActive goroutine.
-		i.activeCond.Wait()
-	}
-	if i.active == nil {
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case i.active = <-i.available:
-			atomic.AddInt64(&i.availableBulkRequests, -1)
-		}
-		if i.timer == nil {
-			i.timer = time.NewTimer(i.config.FlushInterval)
-		} else {
-			i.timer.Reset(i.config.FlushInterval)
-		}
-		i.g.Go(func() error {
-			// The timer may be stopped by i.Close or when
-			// i.config.FlushBytes is exceeded, in which case
-			// i.timerStopped will be signalled.
-			select {
-			case <-i.timerStopped:
-			case <-i.timer.C:
-			}
-			return i.flushActive(context.Background())
-		})
-	}
 
-	if err := i.active.Add(elasticsearch.BulkIndexerItem{
-		Index:  index,
+	select {
+	// Send the BulkIndexerItem to the internal channel, allowing individual
+	// events to be processed by an active bulk indexer in a dedicate goroutine,
+	// which in turn speeds up event processing.
+	case i.bulkItems <- elasticsearch.BulkIndexerItem{
+		Index:  r.indexBuilder.String(),
 		Action: "create",
 		Body:   r,
-	}); err != nil {
-		return err
+	}:
+	case <-ctx.Done():
+		return ctx.Err()
 	}
 	atomic.AddInt64(&i.eventsAdded, 1)
 	atomic.AddInt64(&i.eventsActive, 1)
-
-	if i.active.Len() >= i.config.FlushBytes {
-		if i.timer.Stop() {
-			i.timerStopped <- struct{}{}
-		}
-	}
 	return nil
 }
 
@@ -332,7 +300,7 @@ func encodeMap(v map[string]interface{}, out *fastjson.Writer) error {
 	return nil
 }
 
-func (i *Indexer) flushActive(ctx context.Context) error {
+func (i *Indexer) flushIndexer(ctx context.Context, bulkIndexer *bulkIndexer) error {
 	// Create a child context which is cancelled when the context passed to i.Close is cancelled.
 	flushed := make(chan struct{})
 	defer close(flushed)
@@ -345,11 +313,6 @@ func (i *Indexer) flushActive(ctx context.Context) error {
 		}
 	}()
 
-	i.activeMu.Lock()
-	bulkIndexer := i.active
-	i.active = nil
-	i.activeCond.Broadcast()
-	i.activeMu.Unlock()
 	err := i.flush(ctx, bulkIndexer)
 	bulkIndexer.Reset()
 	i.available <- bulkIndexer
@@ -434,6 +397,72 @@ func (i *Indexer) flush(ctx context.Context, bulkIndexer *bulkIndexer) error {
 	return nil
 }
 
+// runActiveIndexer starts a new active indexer which pulls items from the
+// bulkItems channel. The more active indexers there are, the faster events
+// will be pulled out of the queue, but also the more likely it is that the
+// outgoing Elasticsearch bulk requests are flushed due to the idle timer,
+// rather than due to being full.
+func (i *Indexer) runActiveIndexer() {
+	i.g.Go(func() error {
+		atomic.AddInt64(&i.activeBulkRequests, 1)
+		var mu sync.Mutex
+		// `active` and `timer` must be accessed with `mu` locked.
+		var active *bulkIndexer
+		var timer *time.Timer
+		flush := make(chan *bulkIndexer)
+		defer func() {
+			atomic.AddInt64(&i.activeBulkRequests, -1)
+			close(flush)
+		}()
+		for item := range i.bulkItems {
+			mu.Lock()
+			if active == nil {
+				active = <-i.available
+				atomic.AddInt64(&i.availableBulkRequests, -1)
+				if timer == nil {
+					timer = time.AfterFunc(i.config.FlushInterval, func() {
+						mu.Lock()
+						defer mu.Unlock()
+						flush <- active
+						active = nil
+					})
+				} else {
+					timer.Reset(i.config.FlushInterval)
+				}
+				i.g.Go(func() error {
+					if indexer := <-flush; indexer != nil {
+						return i.flushIndexer(context.Background(), indexer)
+					}
+					return nil
+				})
+			}
+			if err := active.Add(item); err != nil {
+				i.logger.Errorf("failed adding event to bulk indexer: %v", err)
+			}
+			// Flush the active bulk indexer when it's at or exceeds the
+			// configured FlushBytes threshold.
+			if active.Len() >= i.config.FlushBytes {
+				// Stop the timer and only request a flush if the stop
+				// operation succeeded, otherwise the active indexer has
+				// already been flushed by the idle timer.
+				if timer.Stop() {
+					flush <- active
+					active = nil
+				}
+			}
+			mu.Unlock()
+		}
+		// Flush the active indexer when it hasn't yet been flushed.
+		mu.Lock()
+		if active != nil && timer.Stop() {
+			flush <- active
+			active = nil
+		}
+		mu.Unlock()
+		return nil
+	})
+}
+
 var pool sync.Pool
 
 type pooledReader struct {
@@ -495,7 +524,11 @@ type Stats struct {
 	// which counts bytes at the transport level.
 	BytesTotal int64
 
-	// AvailableBulkIndexers represents the number of bulk indexers
+	// AvailableBulkRequests represents the number of bulk indexers
 	// available for making bulk index requests.
 	AvailableBulkRequests int64
+
+	// ActiveBulkRequests represents the number of active bulk indexers that are
+	// concurrently processing batches.
+	ActiveBulkRequests int64
 }
diff --git a/internal/model/modelindexer/indexer_test.go b/internal/model/modelindexer/indexer_test.go