PrometheusRule status update after reconcillation from prometheus#8005
PrometheusRule status update after reconcillation from prometheus#8005simonpasquier merged 15 commits intoprometheus-operator:mainfrom
Conversation
…tedPrometheusRules parameters
|
@simonpasquier PTAL! |
pkg/prometheus/server/operator.go
Outdated
| type ControllerOption func(*Operator) | ||
|
|
||
| // selectedConfigResources return the configuration resources (serviceMonitors, podMonitors, probes and scrapeConfig) | ||
| // selectedConfigResources return the configuration resources (serviceMonitors, podMonitors, probes, rules and scrapeConfigs) |
There was a problem hiding this comment.
| // selectedConfigResources return the configuration resources (serviceMonitors, podMonitors, probes, rules and scrapeConfigs) | |
| // selectedConfigResources return the configuration resources (serviceMonitors, podMonitors, probes, prometheusRules and scrapeConfigs) |
pkg/operator/rules.go
Outdated
| // Select selects PrometheusRules by Prometheus or ThanosRuler. | ||
| // The second returned value is the number of rejected PrometheusRule objects. | ||
| func (prs *PrometheusRuleSelector) Select(namespaces []string) (map[string]string, int, error) { | ||
| func (prs *PrometheusRuleSelector) Select(namespaces []string) (TypedResourcesSelection[*monitoringv1.PrometheusRule], int, error) { |
There was a problem hiding this comment.
we don't need to return the rejected number anymore.
pkg/operator/rules.go
Outdated
| rules[ruleName] = TypedConfigurationResource[*monitoringv1.PrometheusRule]{ | ||
| resource: promRule, | ||
| err: err, | ||
| reason: InvalidConfiguration, |
There was a problem hiding this comment.
hmm? it doesn't look right, reason should be empty if no error.
pkg/prometheus/server/rules.go
Outdated
| func (c *Operator) createOrUpdateRuleConfigMaps(ctx context.Context, p *monitoringv1.Prometheus) ([]string, error) { | ||
| logger := c.logger.With("prometheus", p.Name, "namespace", p.Namespace) | ||
|
|
||
| func (c *Operator) getSelectedPrometheusRules(p *monitoringv1.Prometheus, logger *slog.Logger) (operator.TypedResourcesSelection[*monitoringv1.PrometheusRule], error) { |
There was a problem hiding this comment.
| func (c *Operator) getSelectedPrometheusRules(p *monitoringv1.Prometheus, logger *slog.Logger) (operator.TypedResourcesSelection[*monitoringv1.PrometheusRule], error) { | |
| func (c *Operator) selectPrometheusRules(p *monitoringv1.Prometheus, logger *slog.Logger) (operator.TypedResourcesSelection[*monitoringv1.PrometheusRule], error) { |
pkg/operator/config_resource.go
Outdated
| err error // Error encountered during selection or validation (nil if valid). | ||
| reason string // Reason for rejection; empty if accepted. | ||
| generation int64 // Generation of the desired state (spec). | ||
| content string // Marshalled content of the resource (only for PrometheusRules). |
There was a problem hiding this comment.
rather than extending TypedConfigurationResource with a field which is only useful for PrometheusRule what if we create a specific struct for PrometheusRule wrapping a TypedResourcesSelection + a map[string]string?
…s and update related logic
…unt and rename methods
|
@simonpasquier PTAL |
pkg/operator/config_resource.go
Outdated
| // TypedResourcesSelection represents a map of configuration resources selected by Prometheus or PrometheusAgent. | ||
| type TypedResourcesSelection[T ConfigurationResource] map[string]TypedConfigurationResource[T] | ||
|
|
||
| type SelectedPrometheusRules struct { |
There was a problem hiding this comment.
(nit)
| type SelectedPrometheusRules struct { | |
| type PrometheusRuleSelection struct { |
pkg/operator/config_resource.go
Outdated
| type TypedResourcesSelection[T ConfigurationResource] map[string]TypedConfigurationResource[T] | ||
|
|
||
| type SelectedPrometheusRules struct { | ||
| Rules TypedResourcesSelection[*monitoringv1.PrometheusRule] // PrometheusRules selected. |
There was a problem hiding this comment.
Let's avoid public fields.
| Rules TypedResourcesSelection[*monitoringv1.PrometheusRule] // PrometheusRules selected. | |
| selection TypedResourcesSelection[*monitoringv1.PrometheusRule] // PrometheusRules selected. |
pkg/operator/config_resource.go
Outdated
|
|
||
| type SelectedPrometheusRules struct { | ||
| Rules TypedResourcesSelection[*monitoringv1.PrometheusRule] // PrometheusRules selected. | ||
| MarshalRules map[string]string // Map of valid marshalled rules. |
There was a problem hiding this comment.
| MarshalRules map[string]string // Map of valid marshalled rules. | |
| // Map of rule configuration files serialized to the Prometheus format (key=filename). | |
| ruleFiles map[string]string |
pkg/thanos/rules.go
Outdated
| if tKey, ok := o.accessor.MetaNamespaceKey(t); ok { | ||
| o.metrics.SetSelectedResources(tKey, monitoringv1.PrometheusRuleKind, len(rules)) | ||
| o.metrics.SetRejectedResources(tKey, monitoringv1.PrometheusRuleKind, rejected) | ||
| o.metrics.SetSelectedResources(tKey, monitoringv1.PrometheusRuleKind, len(rules.MarshalRules)) |
There was a problem hiding this comment.
For other resources, we report the number of selected resources, irrespective of their validation state?
I think that we should the same here.
pkg/prometheus/server/rules.go
Outdated
| if pKey, ok := c.accessor.MetaNamespaceKey(p); ok { | ||
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules)) | ||
| c.metrics.SetRejectedResources(pKey, monitoringv1.PrometheusRuleKind, rejected) | ||
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules.MarshalRules)) |
There was a problem hiding this comment.
same remark as below for ThanosRuler
pkg/prometheus/server/operator.go
Outdated
|
|
||
| rules, err := c.selectPrometheusRules(p, logger) | ||
| if err != nil { | ||
| return nil, err |
There was a problem hiding this comment.
(nit) to be consistent
| return nil, err | |
| return nil, fmt.Errorf("selecting PrometheusRule failed: %w", err) |
pkg/prometheus/server/rules.go
Outdated
| return rules, nil | ||
| } | ||
|
|
||
| func (c *Operator) createOrUpdateRuleConfigMaps(ctx context.Context, p *monitoringv1.Prometheus, rules map[string]string, logger *slog.Logger) ([]string, error) { |
There was a problem hiding this comment.
(nit) it can receive a operator.SelectedPrometheusRules argument instead of an anonymous map[string]string
…n and update related logic
|
@simonpasquier merge request |
pkg/operator/rules.go
Outdated
| // Select selects PrometheusRules and translates them into native | ||
| // Prometheus/Thanos configurations. | ||
| // Select selects PrometheusRules by Prometheus or ThanosRuler. | ||
| // The second returned value is the number of rejected PrometheusRule objects. |
There was a problem hiding this comment.
| // The second returned value is the number of rejected PrometheusRule objects. |
pkg/operator/config_resource.go
Outdated
| return validRes | ||
| } | ||
|
|
||
| type PrometheusRuleSelection struct { |
There was a problem hiding this comment.
(nit) can we move this new code to rules.go instead?
pkg/operator/config_resource.go
Outdated
|
|
||
| type PrometheusRuleSelection struct { | ||
| selection TypedResourcesSelection[*monitoringv1.PrometheusRule] // PrometheusRules selected. | ||
| ruleFiles map[string]string // Map of rule configuration files serialized to the Prometheus format (key=filename). |
There was a problem hiding this comment.
(suggestion) add a field to store the number of rejected resources so callers don't have to compute it?
the number of selected resources is len(prs.Selected()).
| ruleFiles map[string]string // Map of rule configuration files serialized to the Prometheus format (key=filename). | |
| rejected int |
There was a problem hiding this comment.
added a function to get Rejected numbers
pkg/prometheus/server/rules.go
Outdated
| if pKey, ok := c.accessor.MetaNamespaceKey(p); ok { | ||
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules)) | ||
| c.metrics.SetRejectedResources(pKey, monitoringv1.PrometheusRuleKind, rejected) | ||
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules.RuleFiles())) |
There was a problem hiding this comment.
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules.RuleFiles())) | |
| c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(rules.Selected())) |
…les.go, update selection logic
@simonpasquier this test is failing i dont think this is related to my changes, can u rerun the test, as it failed last to last time also, but last time it passed |
4ca908c
into
prometheus-operator:main
Description
PrometheusRule status update after reconcillation from prometheus
Closes: #7923
Type of change
What type of changes does your code introduce to the Prometheus operator? Put an
xin the box that apply.CHANGE(fix or feature that would cause existing functionality to not work as expected)FEATURE(non-breaking change which adds functionality)BUGFIX(non-breaking change which fixes an issue)ENHANCEMENT(non-breaking change which improves existing functionality)NONE(if none of the other choices apply. Example, tooling, build system, CI, docs, etc.)Verification
Please check the Prometheus-Operator testing guidelines for recommendations about automated tests.
Changelog entry
Please put a one-line changelog entry below. This will be copied to the changelog file during the release process.