Skip to content

Commit 88bcf61

Browse files
authored
Merge branch '9.1' into mergify/bp/9.1/pr-9257
2 parents ffff388 + 1f32480 commit 88bcf61

25 files changed

Lines changed: 534 additions & 87 deletions

.github/CODEOWNERS

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
/dev-tools/kubernetes @elastic/elastic-agent-control-plane
1717
/docs/release-notes @elastic/ingest-docs
1818
/docs/docset.yml @elastic/ingest-docs
19+
/.github/workflows/update-docs.yml @elastic/ingest-docs
20+
/docs/reference/edot-collector @elastic/ingest-docs
21+
/docs/scripts/update-docs @elastic/ingest-docs
22+
/internal/pkg/otel/samples @elastic/ingest-otel-data @elastic/ingest-docs
23+
/internal/pkg/otel/core-components.yaml @elastic/ingest-otel-data @elastic/ingest-docs
1924
/internal/pkg/composable/providers/kubernetes @elastic/elastic-agent-control-plane
2025
/internal/pkg/otel/samples/darwin/autoops_es.yml @elastic/opex
2126
/internal/pkg/otel/samples/linux/autoops_es.yml @elastic/opex
22-
/internal/pkg/otel/samples/windows/autoops_es.yml @elastic/opex
27+
/internal/pkg/otel/samples/windows/autoops_es.yml @elastic/opex
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: enhancement
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: agent cleans up downloads directory and the new versioned home if upgrade fails
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: "elastic-agent"
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/elastic-agent/pull/9386
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
issue: https://github.com/elastic/elastic-agent/issues/5235
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: enhancement
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: when there is a disk space error during an upgrade, agent responds with clean insufficient disk space error message
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: "elastic-agent"
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/elastic-agent/pull/9392
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
issue: https://github.com/elastic/elastic-agent/issues/5235
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# REQUIRED
2+
# Kind can be one of:
3+
# - breaking-change: a change to previously-documented behavior
4+
# - deprecation: functionality that is being removed in a later release
5+
# - bug-fix: fixes a problem in a previous version
6+
# - enhancement: extends functionality but does not break or fix existing behavior
7+
# - feature: new functionality
8+
# - known-issue: problems that we are aware of in a given version
9+
# - security: impacts on the security of a product or a user’s deployment.
10+
# - upgrade: important information for someone upgrading from a prior version
11+
# - other: does not fit into any of the other categories
12+
kind: bug-fix
13+
14+
# REQUIRED for all kinds
15+
# Change summary; a 80ish characters long description of the change.
16+
summary: reduce-default-telemetry-frequency
17+
18+
# REQUIRED for breaking-change, deprecation, known-issue
19+
# Long description; in case the summary is not enough to describe the change
20+
# this field accommodate a description without length limits.
21+
description:
22+
Reduce the default telemetry frequency to 60 seconds.
23+
This change aims to lower infrastructure costs and reduce label churn in
24+
time-series storage. High-cardinality labels sampled too frequently inflate
25+
storage and index size, and increase query latency with limited added
26+
value.
27+
Environments that require higher resolution can change the `collection_interval` for `hostmetrics`, `kubeletstats` and `k8s_cluster` receivers to a lower value.
28+
29+
# REQUIRED for all kinds
30+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
31+
component: elastic-agent
32+
33+
# AUTOMATED
34+
# OPTIONAL to manually add other PR URLs
35+
# PR URL: A link the PR that added the changeset.
36+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
37+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
38+
# Please provide it if you are adding a fragment for a different PR.
39+
pr: https://github.com/elastic/elastic-agent/pull/9987
40+
41+
# AUTOMATED
42+
# OPTIONAL to manually add other issue URLs
43+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
44+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
45+
# issue: https://github.com/owner/repo/1234

deploy/helm/edot-collector/kube-stack/managed_otlp/values.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ collectors:
139139
- "DELETED"
140140
# [K8s Cluster Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sclusterreceiver)
141141
k8s_cluster:
142+
collection_interval: 60s
142143
auth_type: serviceAccount # Determines how to authenticate to the K8s API server. This can be one of none (for no auth), serviceAccount (to use the standard service account token provided to the agent pod), or kubeConfig to use credentials from ~/.kube/config.
143144
node_conditions_to_report:
144145
- Ready
@@ -375,7 +376,7 @@ collectors:
375376
type: container
376377
# [Hostmetrics Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver)
377378
hostmetrics:
378-
collection_interval: 10s
379+
collection_interval: 60s
379380
root_path: /hostfs # Mounted node's root file system
380381
scrapers:
381382
cpu:
@@ -446,7 +447,7 @@ collectors:
446447
# [Kubelet Stats Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver)
447448
kubeletstats:
448449
auth_type: serviceAccount # Authentication mechanism with the Kubelet endpoint, refer to: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver#configuration
449-
collection_interval: 20s
450+
collection_interval: 60s
450451
endpoint: ${env:OTEL_K8S_NODE_NAME}:10250
451452
node: "${env:OTEL_K8S_NODE_NAME}"
452453
# Required to work for all CSPs without an issue

deploy/helm/edot-collector/kube-stack/values.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ collectors:
119119
- "DELETED"
120120
# [K8s Cluster Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sclusterreceiver)
121121
k8s_cluster:
122+
collection_interval: 60s
122123
auth_type: serviceAccount # Determines how to authenticate to the K8s API server. This can be one of none (for no auth), serviceAccount (to use the standard service account token provided to the agent pod), or kubeConfig to use credentials from ~/.kube/config.
123124
node_conditions_to_report:
124125
- Ready
@@ -335,7 +336,7 @@ collectors:
335336
type: container
336337
# [Hostmetrics Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver)
337338
hostmetrics:
338-
collection_interval: 10s
339+
collection_interval: 60s
339340
root_path: /hostfs # Mounted node's root file system
340341
scrapers:
341342
cpu:
@@ -406,7 +407,7 @@ collectors:
406407
# [Kubelet Stats Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver)
407408
kubeletstats:
408409
auth_type: serviceAccount # Authentication mechanism with the Kubelet endpoint, refer to: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver#configuration
409-
collection_interval: 20s
410+
collection_interval: 60s
410411
endpoint: ${env:OTEL_K8S_NODE_NAME}:10250
411412
node: '${env:OTEL_K8S_NODE_NAME}'
412413
# Required to work for all CSPs without an issue

dev-tools/packaging/testing/package_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,13 @@ func checkFIPS(t *testing.T, agentPackageRootDir string) {
844844
case "-tags":
845845
foundTags = true
846846
require.Contains(t, setting.Value, "requirefips")
847-
require.Contains(t, setting.Value, "ms_tls13kdf")
847+
// the check on ms_tls13kdf is no longer needed for go >= 1.25
848+
// It should probably be conditioned to the output of `go version <binary>`
849+
// for example:
850+
// go version elastic-agent-9.2.0-SNAPSHOT-linux-x86_64/data/elastic-agent-7b3817/components/apm-server
851+
// elastic-agent-9.2.0-SNAPSHOT-linux-x86_64/data/elastic-agent-7b3817/components/apm-server: go1.25.1
852+
//
853+
// require.Contains(t, setting.Value, "ms_tls13kdf")
848854
continue
849855
case "GOEXPERIMENT":
850856
foundExperiment = true

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
3030
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
3131
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade"
32+
upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors"
3233
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
3334
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
3435
"github.com/elastic/elastic-agent/internal/pkg/agent/transpiler"
@@ -671,6 +672,15 @@ func (c *Coordinator) Upgrade(ctx context.Context, version string, sourceURI str
671672
det.SetState(details.StateCompleted)
672673
return c.upgradeMgr.AckAction(ctx, c.fleetAcker, action)
673674
}
675+
676+
c.logger.Errorw("upgrade failed", "error", logp.Error(err))
677+
// If ErrInsufficientDiskSpace is in the error chain, we want to set the
678+
// the error to ErrInsufficientDiskSpace so that the error message is
679+
// more concise and clear.
680+
if errors.Is(err, upgradeErrors.ErrInsufficientDiskSpace) {
681+
err = upgradeErrors.ErrInsufficientDiskSpace
682+
}
683+
674684
det.Fail(err)
675685
return err
676686
}

internal/pkg/agent/application/coordinator/coordinator_unit_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ import (
1515
"errors"
1616
"fmt"
1717
"net"
18+
"sync"
1819
"testing"
1920
"time"
2021

2122
"github.com/elastic/elastic-agent-client/v7/pkg/proto"
23+
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
24+
"github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker"
2225
"github.com/elastic/elastic-agent/internal/pkg/otel/translate"
2326

2427
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/status"
@@ -32,8 +35,10 @@ import (
3235
"github.com/elastic/elastic-agent-libs/logp"
3336
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
3437
"github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/reload"
38+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
3539
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade"
3640
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact"
41+
upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors"
3742
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
3843
"github.com/elastic/elastic-agent/internal/pkg/agent/transpiler"
3944
"github.com/elastic/elastic-agent/internal/pkg/config"
@@ -1650,3 +1655,100 @@ func (fs *fakeMonitoringServer) Reset() {
16501655
func (fs *fakeMonitoringServer) Addr() net.Addr {
16511656
return nil
16521657
}
1658+
1659+
type mockUpgradeManager struct {
1660+
upgradeErr error
1661+
}
1662+
1663+
func (m *mockUpgradeManager) Upgradeable() bool {
1664+
return true
1665+
}
1666+
1667+
func (m *mockUpgradeManager) Reload(cfg *config.Config) error {
1668+
return nil
1669+
}
1670+
1671+
func (m *mockUpgradeManager) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
1672+
return nil, m.upgradeErr
1673+
}
1674+
1675+
func (m *mockUpgradeManager) Ack(ctx context.Context, acker acker.Acker) error {
1676+
return nil
1677+
}
1678+
1679+
func (m *mockUpgradeManager) AckAction(ctx context.Context, acker acker.Acker, action fleetapi.Action) error {
1680+
return nil
1681+
}
1682+
1683+
func (m *mockUpgradeManager) MarkerWatcher() upgrade.MarkerWatcher {
1684+
return nil
1685+
}
1686+
1687+
func TestCoordinator_Upgrade_InsufficientDiskSpaceError(t *testing.T) {
1688+
log, _ := loggertest.New("coordinator-insufficient-disk-space-test")
1689+
1690+
mockUpgradeManager := &mockUpgradeManager{
1691+
upgradeErr: fmt.Errorf("wrapped: %w", upgradeErrors.ErrInsufficientDiskSpace),
1692+
}
1693+
1694+
initialState := State{
1695+
CoordinatorState: agentclient.Healthy,
1696+
CoordinatorMessage: "Running",
1697+
}
1698+
1699+
coord := &Coordinator{
1700+
state: initialState,
1701+
logger: log,
1702+
upgradeMgr: mockUpgradeManager,
1703+
stateBroadcaster: broadcaster.New(initialState, 64, 32),
1704+
overrideStateChan: make(chan *coordinatorOverrideState),
1705+
upgradeDetailsChan: make(chan *details.Details),
1706+
}
1707+
1708+
wg := sync.WaitGroup{}
1709+
wg.Add(2)
1710+
1711+
overrideStates := []agentclient.State{}
1712+
go func() {
1713+
state1 := <-coord.overrideStateChan
1714+
overrideStates = append(overrideStates, state1.state)
1715+
1716+
state2 := <-coord.overrideStateChan
1717+
if state2 != nil {
1718+
overrideStates = append(overrideStates, state2.state)
1719+
}
1720+
1721+
wg.Done()
1722+
}()
1723+
1724+
upgradeDetails := []*details.Details{}
1725+
go func() {
1726+
upgradeDetails = append(upgradeDetails, <-coord.upgradeDetailsChan)
1727+
upgradeDetails = append(upgradeDetails, <-coord.upgradeDetailsChan)
1728+
wg.Done()
1729+
}()
1730+
1731+
err := coord.Upgrade(t.Context(), "", "", nil)
1732+
require.Error(t, err)
1733+
require.Equal(t, err, upgradeErrors.ErrInsufficientDiskSpace)
1734+
1735+
wg.Wait()
1736+
1737+
require.Equal(t, []agentclient.State{agentclient.Upgrading}, overrideStates)
1738+
1739+
require.Equal(t, []*details.Details{
1740+
{
1741+
TargetVersion: "",
1742+
State: details.StateRequested,
1743+
ActionID: "",
1744+
},
1745+
{
1746+
TargetVersion: "",
1747+
State: details.StateFailed,
1748+
Metadata: details.Metadata{
1749+
FailedState: details.StateRequested,
1750+
ErrorMsg: upgradeErrors.ErrInsufficientDiskSpace.Error(),
1751+
},
1752+
},
1753+
}, upgradeDetails)
1754+
}

internal/pkg/agent/application/upgrade/artifact/download/verifier.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,12 @@ func VerifySHA512HashWithCleanup(log infoWarnLogger, filename string) error {
106106
}
107107
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
108108
// it's not a simple hash mismatch, probably something is wrong with the hash file
109-
hashFileName := getHashFileName(filename)
109+
hashFileName := AddHashExtension(filename)
110110
hashFileBytes, readErr := os.ReadFile(hashFileName)
111111
if readErr != nil {
112-
log.Warnf("error verifying the package using hash file %q, unable do read contents for logging: %v", getHashFileName(filename), readErr)
112+
log.Warnf("error verifying the package using hash file %q, unable do read contents for logging: %v", AddHashExtension(filename), readErr)
113113
} else {
114-
log.Warnf("error verifying the package using hash file %q, contents: %q", getHashFileName(filename), string(hashFileBytes))
114+
log.Warnf("error verifying the package using hash file %q, contents: %q", AddHashExtension(filename), string(hashFileBytes))
115115
}
116116
}
117117

@@ -121,20 +121,20 @@ func VerifySHA512HashWithCleanup(log infoWarnLogger, filename string) error {
121121
return nil
122122
}
123123

124-
func getHashFileName(filename string) string {
124+
func AddHashExtension(file string) string {
125125
const hashFileExt = ".sha512"
126-
if strings.HasSuffix(filename, hashFileExt) {
127-
return filename
126+
if strings.HasSuffix(file, hashFileExt) {
127+
return file
128128
}
129-
return filename + hashFileExt
129+
return file + hashFileExt
130130
}
131131

132132
// VerifySHA512Hash checks that a sidecar file containing a sha512 checksum
133133
// exists and that the checksum in the sidecar file matches the checksum of
134134
// the file. It returns an error if validation fails.
135135
func VerifySHA512Hash(filename string) error {
136136
hasher := sha512.New()
137-
checksumFileName := getHashFileName(filename)
137+
checksumFileName := AddHashExtension(filename)
138138
return VerifyChecksum(hasher, filename, checksumFileName)
139139
}
140140

0 commit comments

Comments
 (0)