Skip to content

Commit ec2fc77

Browse files
[CONTINT-5186] Use in-place pod resizing in the vertical controller (#47998)
### What does this PR do? This PR implements IPVPA in the autoscaling vertical controller according to the [RFC](https://datadoghq.atlassian.net/wiki/spaces/CONT/pages/6246498427/In-Place+Vertical+Pod+Resizing+for+Workload+Autoscaling) See the RFC for the full specification, but key components are: - In-place resize via pods/resize subresource, with eviction fallback (PDB-aware) and rollout fallback - API server feature gate check (pods/resize discovery, cached 15min) - ResizeSuccessful event emitted once ### Motivation https://datadoghq.atlassian.net/browse/CONTINT-5126 ### Describe how you validated your changes Deployed several workloads and DPAs on an EKS cluster to dddev. 1. Happy path (i.e., in-place resize with no restarts) -> ResizeSuccessful event emitted exactly once and restartCount=0. 2. Trigger rollout (i.e., using `mode:TriggerRollout` on the DPA forces the legacy rollout path): works as expected 3. Memory restart policy (i.e., container has resizePolicy requiring restart on memory limit/req changes): Verified restartCount > 0 on pods after a memory recommendation change. 4. Sidecar (i.e., DPA with `constraints.containers: [{name: server}]`). Only the server container is resized. Cluster/workloads are still available for inspection: https://dddev.datadoghq.com/orchestration/scaling/workload?query=kube_cluster_name%3Ajrosario-ipvpa-final%20-kube_cluster_name%3Ajrosario-ipvpa3-mar18&workload_scaling_tab=optimized-workloads ### Additional Notes This change is also related to/relies on: - [datadog-operator](DataDog/datadog-operator#2743). For local testing I used `go.work` entry to point to local operator. - helm-charts [RBAC for pods/resize](DataDog/helm-charts#2493) (patch verb on pods subresource). Co-authored-by: cedric.lamoriniere <cedric.lamoriniere@datadoghq.com>
1 parent 77391a9 commit ec2fc77

39 files changed

Lines changed: 1222 additions & 197 deletions

File tree

.golangci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,10 @@ linters:
351351
exclusions:
352352
generated: lax
353353
rules:
354+
- linters:
355+
- staticcheck
356+
path: pkg/clusteragent/admission/mutate/cwsinstrumentation/
357+
text: "SA1019: slices"
354358
# 'errcheck' errors in tools/dep_tree_resolver/go_deps.go
355359
- path: (.+)\.go$
356360
text: Error return value of `io.WriteString` is not checked

comp/core/workloadmeta/collectors/internal/kubelet/kubelet_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
func TestPodParser(t *testing.T) {
3535
creationTimestamp := time.Date(2025, time.January, 1, 12, 0, 0, 0, time.UTC)
3636
startTime := creationTimestamp.Add(time.Minute)
37+
conditionTransitionTime := creationTimestamp.Add(30 * time.Second)
3738

3839
referencePod := []*kubelet.Pod{
3940
{
@@ -129,8 +130,9 @@ func TestPodParser(t *testing.T) {
129130
Reason: "SomeReason",
130131
Conditions: []kubelet.Conditions{
131132
{
132-
Type: string(corev1.PodReady),
133-
Status: string(corev1.ConditionTrue),
133+
Type: string(corev1.PodReady),
134+
Status: string(corev1.ConditionTrue),
135+
LastTransitionTime: conditionTransitionTime,
134136
},
135137
},
136138
PodIP: "127.0.0.1",
@@ -388,8 +390,9 @@ func TestPodParser(t *testing.T) {
388390
},
389391
Conditions: []workloadmeta.KubernetesPodCondition{
390392
{
391-
Type: "Ready",
392-
Status: "True",
393+
Type: "Ready",
394+
Status: "True",
395+
LastTransitionTime: conditionTransitionTime,
393396
},
394397
},
395398
Volumes: []workloadmeta.KubernetesPodVolume{

comp/core/workloadmeta/collectors/util/kubelet.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,10 @@ func convertConditions(conditions []kubelet.Conditions) []workloadmeta.Kubernete
586586

587587
for i, condition := range conditions {
588588
result[i] = workloadmeta.KubernetesPodCondition{
589-
Type: condition.Type,
590-
Status: condition.Status,
591-
Reason: condition.Reason,
589+
Type: condition.Type,
590+
Status: condition.Status,
591+
Reason: condition.Reason,
592+
LastTransitionTime: condition.LastTransitionTime,
592593
}
593594
}
594595

comp/core/workloadmeta/def/types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,9 +1078,10 @@ func (t KubernetesPodToleration) String(_ bool) string {
10781078

10791079
// KubernetesPodCondition represents a condition in a Kubernetes pod status.
10801080
type KubernetesPodCondition struct {
1081-
Type string
1082-
Status string
1083-
Reason string
1081+
Type string
1082+
Status string
1083+
Reason string
1084+
LastTransitionTime time.Time
10841085
}
10851086

10861087
// String returns a string representation of KubernetesPodCondition.

comp/otelcol/collector-contrib/impl/go.mod

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ require (
202202
github.com/edsrzf/mmap-go v1.2.0 // indirect
203203
github.com/elastic/go-grok v0.3.1 // indirect
204204
github.com/elastic/lunes v0.2.0 // indirect
205-
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
205+
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
206206
github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
207207
github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
208208
github.com/expr-lang/expr v1.17.8 // indirect
@@ -219,8 +219,8 @@ require (
219219
github.com/go-ole/go-ole v1.3.0 // indirect
220220
github.com/go-openapi/analysis v0.24.1 // indirect
221221
github.com/go-openapi/errors v0.22.4 // indirect
222-
github.com/go-openapi/jsonpointer v0.22.1 // indirect
223-
github.com/go-openapi/jsonreference v0.21.3 // indirect
222+
github.com/go-openapi/jsonpointer v0.22.4 // indirect
223+
github.com/go-openapi/jsonreference v0.21.4 // indirect
224224
github.com/go-openapi/loads v0.23.2 // indirect
225225
github.com/go-openapi/spec v0.22.1 // indirect
226226
github.com/go-openapi/strfmt v0.25.0 // indirect
@@ -250,7 +250,7 @@ require (
250250
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
251251
github.com/golang/protobuf v1.5.4 // indirect
252252
github.com/golang/snappy v1.0.0 // indirect
253-
github.com/google/gnostic-models v0.7.0 // indirect
253+
github.com/google/gnostic-models v0.7.1 // indirect
254254
github.com/google/go-cmp v0.7.0 // indirect
255255
github.com/google/go-querystring v1.2.0 // indirect
256256
github.com/google/go-tpm v0.9.8 // indirect
@@ -325,6 +325,8 @@ require (
325325
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
326326
github.com/oklog/ulid v1.3.1 // indirect
327327
github.com/oklog/ulid/v2 v2.1.1 // indirect
328+
github.com/onsi/ginkgo/v2 v2.27.5 // indirect
329+
github.com/onsi/gomega v1.39.0 // indirect
328330
github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer v0.147.0 // indirect
329331
github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/ecsutil v0.147.0 // indirect
330332
github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.147.0 // indirect
@@ -397,8 +399,6 @@ require (
397399
github.com/stackitcloud/stackit-sdk-go/core v0.20.1 // indirect
398400
github.com/stretchr/objx v0.5.2 // indirect
399401
github.com/stretchr/testify v1.11.1 // indirect
400-
github.com/tidwall/gjson v1.18.0 // indirect
401-
github.com/tidwall/pretty v1.2.1 // indirect
402402
github.com/tilinna/clock v1.1.0 // indirect
403403
github.com/tinylib/msgp v1.6.3 // indirect
404404
github.com/tklauser/go-sysconf v0.3.16 // indirect
@@ -533,8 +533,8 @@ require (
533533
k8s.io/apimachinery v0.35.1 // indirect
534534
k8s.io/client-go v0.35.1 // indirect
535535
k8s.io/klog/v2 v2.130.1 // indirect
536-
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
537-
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
536+
k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e // indirect
537+
k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2 // indirect
538538
sigs.k8s.io/controller-runtime v0.23.1 // indirect
539539
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
540540
sigs.k8s.io/randfill v1.0.0 // indirect

comp/otelcol/collector-contrib/impl/go.sum

Lines changed: 16 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

comp/otelcol/ddflareextension/impl/go.mod

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,7 @@ require (
4747
go.yaml.in/yaml/v2 v2.4.3
4848
)
4949

50-
require (
51-
github.com/puzpuzpuz/xsync/v4 v4.4.0 // indirect
52-
github.com/tidwall/gjson v1.18.0 // indirect
53-
github.com/tidwall/pretty v1.2.1 // indirect
54-
)
50+
require github.com/puzpuzpuz/xsync/v4 v4.4.0 // indirect
5551

5652
require (
5753
github.com/DataDog/datadog-agent/comp/core/secrets/noop-impl v0.77.0-devel.0.20260213154712-e02b9359151a // indirect
@@ -91,6 +87,8 @@ require (
9187
github.com/hashicorp/go-msgpack v1.1.5 // indirect
9288
github.com/hashicorp/go-msgpack/v2 v2.1.2 // indirect
9389
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
90+
github.com/onsi/ginkgo/v2 v2.27.5 // indirect
91+
github.com/onsi/gomega v1.39.0 // indirect
9492
github.com/open-telemetry/opentelemetry-collector-contrib/internal/healthcheck v0.147.0 // indirect
9593
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/status v0.147.0 // indirect
9694
github.com/outcaste-io/ristretto v0.2.3 // indirect
@@ -264,7 +262,7 @@ require (
264262
github.com/edsrzf/mmap-go v1.2.0 // indirect
265263
github.com/elastic/go-grok v0.3.1 // indirect
266264
github.com/elastic/lunes v0.2.0 // indirect
267-
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
265+
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
268266
github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect; indrc.1irect
269267
github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
270268
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect
@@ -280,8 +278,8 @@ require (
280278
github.com/go-ole/go-ole v1.3.0 // indirect
281279
github.com/go-openapi/analysis v0.24.1 // indirect
282280
github.com/go-openapi/errors v0.22.4 // indirect
283-
github.com/go-openapi/jsonpointer v0.22.1 // indirect
284-
github.com/go-openapi/jsonreference v0.21.3 // indirect
281+
github.com/go-openapi/jsonpointer v0.22.4 // indirect
282+
github.com/go-openapi/jsonreference v0.21.4 // indirect
285283
github.com/go-openapi/loads v0.23.2 // indirect
286284
github.com/go-openapi/spec v0.22.1 // indirect
287285
github.com/go-openapi/strfmt v0.25.0 // indirect
@@ -298,7 +296,7 @@ require (
298296
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
299297
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
300298
github.com/golang/snappy v1.0.0 // indirect
301-
github.com/google/gnostic-models v0.7.0 // indirect
299+
github.com/google/gnostic-models v0.7.1 // indirect
302300
github.com/google/go-querystring v1.2.0 // indirect
303301
github.com/google/go-tpm v0.9.8 // indirect
304302
github.com/google/s2a-go v0.1.9 // indirect
@@ -526,8 +524,8 @@ require (
526524
k8s.io/apimachinery v0.35.1 // indirect
527525
k8s.io/client-go v0.35.1 // indirect
528526
k8s.io/klog/v2 v2.130.1 // indirect
529-
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
530-
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
527+
k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e // indirect
528+
k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2 // indirect
531529
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
532530
sigs.k8s.io/yaml v1.6.0 // indirect
533531
)

0 commit comments

Comments
 (0)