Skip to content

Commit caa219f

Browse files
committed
fix(injector): detect cgroupv1 unlimited sentinel
On cgroupv1, no memory limit is reported as the sentinel value 9223372036854771712 instead of "max". Without detection, this overflows the target byte calculation making the disruption a silent no-op. Return a clear error when the sentinel is detected. Add unit tests covering join failure, cgroupv2 "max", cgroupv1 unlimited sentinel, and a real limit path.
1 parent dbc0f50 commit caa219f

4 files changed

Lines changed: 165 additions & 1 deletion

File tree

examples/complete.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ spec:
103103
delayJitter: 5 # add X % (1-100) of delay as jitter to delay (+- X% ms to original delay), defaults to 10%
104104
bandwidthLimit: 10000 # bandwidth limit in bytes
105105
cpuPressure: {} # cpu load generator
106+
memoryPressure: # memory load generator
107+
targetPercent: "50%" # percentage of available memory to consume (e.g. "50%")
108+
rampDuration: 10m # optional, duration over which to gradually reach targetPercent
106109
diskPressure: # disk pressure
107110
path: /mnt/data # mount point (in the pod) to apply throttle on
108111
throttling:

examples/memory_pressure.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: chaos.datadoghq.com/v1beta1
2+
kind: Disruption
3+
metadata:
4+
name: memory-pressure
5+
namespace: chaos-demo
6+
spec:
7+
level: pod
8+
selector:
9+
service: demo-curl
10+
count: 100%
11+
duration: 5m
12+
memoryPressure:
13+
targetPercent: "50%"
14+
#rampDuration: 10m

injector/memory_stress.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,19 @@ func (m *memoryStressInjector) readMemoryLimit() (int64, error) {
228228
return 0, fmt.Errorf("unable to read memory.limit_in_bytes: %w", err)
229229
}
230230

231-
return strconv.ParseInt(strings.TrimSpace(content), 10, 64)
231+
limit, err := strconv.ParseInt(strings.TrimSpace(content), 10, 64)
232+
if err != nil {
233+
return 0, err
234+
}
235+
236+
// cgroupv1 reports PAGE_ALIGN(math.MaxInt64) = 9223372036854771712 when no memory limit is set.
237+
// Use a 4 PiB threshold to detect this sentinel: no real workload has that much RAM.
238+
const cgroupV1UnlimitedThreshold = int64(4 * 1024 * 1024 * 1024 * 1024 * 1024) // 4 PiB
239+
if limit >= cgroupV1UnlimitedThreshold {
240+
return 0, fmt.Errorf("memory limit is unlimited (memory.limit_in_bytes=%d), cannot determine target bytes", limit)
241+
}
242+
243+
return limit, nil
232244
}
233245

234246
func (m *memoryStressInjector) readMemoryUsage() (int64, error) {

injector/memory_stress_test.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/).
4+
// Copyright 2026 Datadog, Inc.
5+
6+
package injector_test
7+
8+
import (
9+
"fmt"
10+
"time"
11+
12+
"github.com/DataDog/chaos-controller/cgroup"
13+
. "github.com/DataDog/chaos-controller/injector"
14+
"github.com/DataDog/chaos-controller/process"
15+
. "github.com/onsi/ginkgo/v2"
16+
. "github.com/onsi/gomega"
17+
)
18+
19+
var _ = Describe("Memory stress", func() {
20+
var (
21+
config Config
22+
inj Injector
23+
cgroups *cgroup.ManagerMock
24+
manager *process.ManagerMock
25+
)
26+
27+
const processID = 42
28+
29+
BeforeEach(func() {
30+
cgroups = cgroup.NewManagerMock(GinkgoT())
31+
manager = process.NewManagerMock(GinkgoT())
32+
33+
config = Config{
34+
Log: log,
35+
Cgroup: cgroups,
36+
}
37+
})
38+
39+
JustBeforeEach(func() {
40+
inj = NewMemoryStressInjector(config, 80, time.Duration(0), manager)
41+
})
42+
43+
Describe("Inject", func() {
44+
Context("when cgroup Join fails", func() {
45+
BeforeEach(func() {
46+
cgroups.EXPECT().Join(processID).Return(fmt.Errorf("join error")).Once()
47+
manager.EXPECT().ProcessID().Return(processID).Once()
48+
})
49+
50+
It("returns an error", func() {
51+
Expect(inj.Inject()).To(MatchError("unable to join cgroup for process '42': join error"))
52+
})
53+
})
54+
55+
Describe("cgroupv2", func() {
56+
BeforeEach(func() {
57+
cgroups.EXPECT().Join(processID).Return(nil).Once()
58+
manager.EXPECT().ProcessID().Return(processID).Once()
59+
cgroups.EXPECT().IsCgroupV2().Return(true)
60+
})
61+
62+
Context("when memory.max read fails", func() {
63+
BeforeEach(func() {
64+
cgroups.EXPECT().Read("", "memory.max").Return("", fmt.Errorf("read error")).Once()
65+
})
66+
67+
It("returns an error", func() {
68+
Expect(inj.Inject()).To(MatchError(ContainSubstring("unable to read memory limit")))
69+
})
70+
})
71+
72+
Context("when memory.max is 'max' (unlimited)", func() {
73+
BeforeEach(func() {
74+
cgroups.EXPECT().Read("", "memory.max").Return("max", nil).Once()
75+
})
76+
77+
It("returns an error containing 'unlimited'", func() {
78+
Expect(inj.Inject()).To(MatchError(ContainSubstring("unlimited")))
79+
})
80+
})
81+
})
82+
83+
Describe("cgroupv1", func() {
84+
BeforeEach(func() {
85+
cgroups.EXPECT().Join(processID).Return(nil).Once()
86+
manager.EXPECT().ProcessID().Return(processID).Once()
87+
cgroups.EXPECT().IsCgroupV2().Return(false)
88+
})
89+
90+
Context("when memory.limit_in_bytes read fails", func() {
91+
BeforeEach(func() {
92+
cgroups.EXPECT().Read("memory", "memory.limit_in_bytes").Return("", fmt.Errorf("read error")).Once()
93+
})
94+
95+
It("returns an error", func() {
96+
Expect(inj.Inject()).To(MatchError(ContainSubstring("unable to read memory limit")))
97+
})
98+
})
99+
100+
Context("when memory.limit_in_bytes is the unlimited sentinel (9223372036854771712)", func() {
101+
BeforeEach(func() {
102+
cgroups.EXPECT().Read("memory", "memory.limit_in_bytes").Return("9223372036854771712", nil).Once()
103+
})
104+
105+
It("returns an error containing 'unlimited'", func() {
106+
Expect(inj.Inject()).To(MatchError(ContainSubstring("unlimited")))
107+
})
108+
})
109+
110+
Context("when memory.limit_in_bytes is set to a real limit", func() {
111+
const memLimit = "536870912" // 512 MiB
112+
const memUsage = "104857600" // 100 MiB
113+
114+
BeforeEach(func() {
115+
cgroups.EXPECT().Read("memory", "memory.limit_in_bytes").Return(memLimit, nil).Once()
116+
cgroups.EXPECT().IsCgroupV2().Return(false).Maybe()
117+
cgroups.EXPECT().Read("memory", "memory.usage_in_bytes").Return(memUsage, nil).Once()
118+
})
119+
120+
It("succeeds and starts allocating memory", func() {
121+
Expect(inj.Inject()).To(Succeed())
122+
123+
// clean up background goroutine
124+
Expect(inj.Clean()).To(Succeed())
125+
})
126+
})
127+
})
128+
})
129+
130+
Describe("Clean", func() {
131+
It("succeeds when no injection has occurred", func() {
132+
Expect(inj.Clean()).To(Succeed())
133+
})
134+
})
135+
})

0 commit comments

Comments
 (0)