Skip to content

Commit 72da5a6

Browse files
ansellmarc-gr
andauthored
Add success cache minimum TTL for DNS responses (#18986)
An enhancement to add a minimum alternative cache TTL to the libbeat dns processor for successful DNS responses. This ensures that TTL=0 successful reverse DNS responses can be cached to avoid sending the same reverse DNS request again within a short period of time. The libbeat dns processor is used as a reverse DNS annotator for auditbeat events. Some of these IP addresses respond to reverse DNS requests with TTL=0 in the responses. These were causing load issues for my systems when I had the reverse DNS processor enabled for auditbeat. The new settings is `success_cache.min_ttl`. Closes #18709 Signed-off-by: Peter Ansell <p_ansell@yahoo.com> Co-authored-by: Marc Guasch <marc.guasch@elastic.co>
1 parent dec042c commit 72da5a6

5 files changed

Lines changed: 58 additions & 14 deletions

File tree

CHANGELOG.next.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,12 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
372372
- Add support to trim captured values in the dissect processor. {pull}19464[19464]
373373
- Added the `max_cached_sessions` option to the script processor. {pull}19562[19562]
374374
- Add support for DNS over TLS for the dns_processor. {pull}19321[19321]
375+
- Add minimum cache TTL for successful DNS responses. {pull}18986[18986]
375376
- Set index.max_docvalue_fields_search in index template to increase value to 200 fields. {issue}20215[20215]
376377
- Add leader election for Kubernetes autodiscover. {pull}20281[20281]
377378
- Add capability of enriching process metadata with contianer id also for non-privileged containers in `add_process_metadata` processor. {pull}19767[19767]
378379

380+
379381
*Auditbeat*
380382

381383
- Reference kubernetes manifests include configuration for auditd and enrichment with kubernetes metadata. {pull}17431[17431]

libbeat/processors/dns/cache.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ func (r ptrRecord) IsExpired(now time.Time) bool {
3535

3636
type ptrCache struct {
3737
sync.RWMutex
38-
data map[string]ptrRecord
39-
maxSize int
38+
data map[string]ptrRecord
39+
maxSize int
40+
minSuccessTTL time.Duration
4041
}
4142

4243
func (c *ptrCache) set(now time.Time, key string, ptr *PTR) {
@@ -135,11 +136,10 @@ func (ce *cachedError) Cause() error { return ce.err }
135136
// reverse DNS queries. It caches the results of queries regardless of their
136137
// outcome (success or failure).
137138
type PTRLookupCache struct {
138-
success *ptrCache
139-
failure *failureCache
140-
failureTTL time.Duration
141-
resolver PTRResolver
142-
stats cacheStats
139+
success *ptrCache
140+
failure *failureCache
141+
resolver PTRResolver
142+
stats cacheStats
143143
}
144144

145145
type cacheStats struct {
@@ -155,8 +155,9 @@ func NewPTRLookupCache(reg *monitoring.Registry, conf CacheConfig, resolver PTRR
155155

156156
c := &PTRLookupCache{
157157
success: &ptrCache{
158-
data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity),
159-
maxSize: conf.SuccessCache.MaxCapacity,
158+
data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity),
159+
maxSize: conf.SuccessCache.MaxCapacity,
160+
minSuccessTTL: conf.SuccessCache.MinTTL,
160161
},
161162
failure: &failureCache{
162163
data: make(map[string]failureRecord, conf.FailureCache.InitialCapacity),
@@ -198,11 +199,14 @@ func (c PTRLookupCache) LookupPTR(ip string) (*PTR, error) {
198199
return nil, err
199200
}
200201

202+
// We set the ptr.TTL to the minimum TTL in case it is less than that.
203+
ptr.TTL = max(ptr.TTL, uint32(c.success.minSuccessTTL/time.Second))
204+
201205
c.success.set(now, ip, ptr)
202206
return ptr, nil
203207
}
204208

205-
func max(a, b int) int {
209+
func max(a, b uint32) uint32 {
206210
if a >= b {
207211
return a
208212
}

libbeat/processors/dns/cache_test.go

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ package dns
1919

2020
import (
2121
"io"
22-
"strings"
2322
"testing"
23+
"time"
2424

2525
"github.com/stretchr/testify/assert"
2626

@@ -30,12 +30,14 @@ import (
3030
type stubResolver struct{}
3131

3232
func (r *stubResolver) LookupPTR(ip string) (*PTR, error) {
33-
if ip == gatewayIP {
33+
switch ip {
34+
case gatewayIP:
3435
return &PTR{Host: gatewayName, TTL: gatewayTTL}, nil
35-
} else if strings.HasSuffix(ip, "11") {
36+
case gatewayIP + "1":
3637
return nil, io.ErrUnexpectedEOF
38+
case gatewayIP + "2":
39+
return &PTR{Host: gatewayName, TTL: 0}, nil
3740
}
38-
3941
return nil, &dnsError{"fake lookup returned NXDOMAIN"}
4042
}
4143

@@ -98,4 +100,29 @@ func TestCache(t *testing.T) {
98100
assert.EqualValues(t, 3, c.stats.Hit.Get())
99101
assert.EqualValues(t, 3, c.stats.Miss.Get()) // Cache miss.
100102
}
103+
104+
minTTL := defaultConfig.CacheConfig.SuccessCache.MinTTL
105+
// Initial success returned TTL=0 with MinTTL.
106+
ptr, err = c.LookupPTR(gatewayIP + "2")
107+
if assert.NoError(t, err) {
108+
assert.EqualValues(t, gatewayName, ptr.Host)
109+
110+
assert.EqualValues(t, minTTL/time.Second, ptr.TTL)
111+
assert.EqualValues(t, 3, c.stats.Hit.Get())
112+
assert.EqualValues(t, 4, c.stats.Miss.Get())
113+
114+
expectedExpire := time.Now().Add(minTTL).Unix()
115+
gotExpire := c.success.data[gatewayIP+"2"].expires.Unix()
116+
assert.InDelta(t, expectedExpire, gotExpire, 1)
117+
}
118+
119+
// Cached success from a previous TTL=0 response.
120+
ptr, err = c.LookupPTR(gatewayIP + "2")
121+
if assert.NoError(t, err) {
122+
assert.EqualValues(t, gatewayName, ptr.Host)
123+
// TTL counts down while in cache.
124+
assert.InDelta(t, minTTL/time.Second, ptr.TTL, 1)
125+
assert.EqualValues(t, 4, c.stats.Hit.Get())
126+
assert.EqualValues(t, 4, c.stats.Miss.Get())
127+
}
101128
}

libbeat/processors/dns/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ type CacheSettings struct {
8888
// from the DNS record.
8989
TTL time.Duration `config:"ttl"`
9090

91+
// Minimum TTL value for successful DNS responses.
92+
MinTTL time.Duration `config:"min_ttl" validate:"min=1"`
93+
9194
// Initial capacity. How much space is allocated at initialization.
9295
InitialCapacity int `config:"capacity.initial" validate:"min=0"`
9396

@@ -131,6 +134,9 @@ func (c *Config) Validate() error {
131134

132135
// Validate validates the data contained in the CacheConfig.
133136
func (c *CacheConfig) Validate() error {
137+
if c.SuccessCache.MinTTL <= 0 {
138+
return errors.Errorf("success_cache.min_ttl must be > 0")
139+
}
134140
if c.FailureCache.TTL <= 0 {
135141
return errors.Errorf("failure_cache.ttl must be > 0")
136142
}
@@ -155,6 +161,7 @@ func (c *CacheConfig) Validate() error {
155161
var defaultConfig = Config{
156162
CacheConfig: CacheConfig{
157163
SuccessCache: CacheSettings{
164+
MinTTL: time.Minute,
158165
InitialCapacity: 1000,
159166
MaxCapacity: 10000,
160167
},

libbeat/processors/dns/docs/dns.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ processors:
5252
success_cache:
5353
capacity.initial: 1000
5454
capacity.max: 10000
55+
min_ttl: 1m
5556
failure_cache:
5657
capacity.initial: 1000
5758
capacity.max: 10000
@@ -81,6 +82,9 @@ the memory for this number of items. Default value is `1000`.
8182
cache can hold. When the maximum capacity is reached a random item is evicted.
8283
Default value is `10000`.
8384

85+
`success_cache.min_ttl`:: The duration of the minimum alternative cache TTL for successful DNS responses. Ensures that `TTL=0` successful reverse DNS responses can be cached.
86+
Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`.
87+
8488
`failure_cache.capacity.initial`:: The initial number of items that the failure
8589
cache will be allocated to hold. When initialized the processor will allocate
8690
the memory for this number of items. Default value is `1000`.

0 commit comments

Comments
 (0)