Skip to content

Commit c704a07

Browse files
authored
OpenTelemetry Support for Tracing in ToolHive Operator (#1720)
adds otel tracing support for toolhive operator Signed-off-by: ChrisJBurns <29541485+ChrisJBurns@users.noreply.github.com>
1 parent b5332a3 commit c704a07

25 files changed

Lines changed: 360 additions & 82 deletions

cmd/thv-operator/api/v1alpha1/mcpserver_types.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,10 @@ type OpenTelemetryConfig struct {
482482
// Metrics defines OpenTelemetry metrics-specific configuration
483483
// +optional
484484
Metrics *OpenTelemetryMetricsConfig `json:"metrics,omitempty"`
485+
486+
// Tracing defines OpenTelemetry tracing configuration
487+
// +optional
488+
Tracing *OpenTelemetryTracingConfig `json:"tracing,omitempty"`
485489
}
486490

487491
// PrometheusConfig defines Prometheus-specific configuration
@@ -492,10 +496,23 @@ type PrometheusConfig struct {
492496
Enabled bool `json:"enabled,omitempty"`
493497
}
494498

499+
// OpenTelemetryTracingConfig defines OpenTelemetry tracing configuration
500+
type OpenTelemetryTracingConfig struct {
501+
// Enabled controls whether OTLP tracing is sent
502+
// +kubebuilder:default=false
503+
// +optional
504+
Enabled bool `json:"enabled,omitempty"`
505+
506+
// SamplingRate is the trace sampling rate (0.0-1.0)
507+
// +kubebuilder:default="0.05"
508+
// +optional
509+
SamplingRate string `json:"samplingRate,omitempty"`
510+
}
511+
495512
// OpenTelemetryMetricsConfig defines OpenTelemetry metrics configuration
496513
type OpenTelemetryMetricsConfig struct {
497514
// Enabled controls whether OTLP metrics are sent
498-
// +kubebuilder:default=true
515+
// +kubebuilder:default=false
499516
// +optional
500517
Enabled bool `json:"enabled,omitempty"`
501518
}

cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/thv-operator/controllers/mcpserver_controller.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,25 @@ func (*MCPServerReconciler) generateOpenTelemetryArgs(m *mcpv1alpha1.MCPServer)
16641664
args = append(args, "--otel-insecure")
16651665
}
16661666

1667+
// Handle tracing configuration
1668+
if otel.Tracing != nil {
1669+
if otel.Tracing.Enabled {
1670+
args = append(args, "--otel-tracing-enabled=true")
1671+
args = append(args, fmt.Sprintf("--otel-tracing-sampling-rate=%s", otel.Tracing.SamplingRate))
1672+
} else {
1673+
args = append(args, "--otel-tracing-enabled=false")
1674+
}
1675+
}
1676+
1677+
// Handle metrics configuration
1678+
if otel.Metrics != nil {
1679+
if otel.Metrics.Enabled {
1680+
args = append(args, "--otel-metrics-enabled=true")
1681+
} else {
1682+
args = append(args, "--otel-metrics-enabled=false")
1683+
}
1684+
}
1685+
16671686
return args
16681687
}
16691688

cmd/thv-operator/controllers/mcpserver_opentelemetry_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func TestTelemetryArgs(t *testing.T) {
7676
},
7777
},
7878
{
79-
name: "OpenTelemetry with Prometheus enabled",
79+
name: "OpenTelemetry metrics with Prometheus enabled",
8080
telemetryConfig: &mcpv1alpha1.TelemetryConfig{
8181
OpenTelemetry: &mcpv1alpha1.OpenTelemetryConfig{
8282
Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{
@@ -89,6 +89,7 @@ func TestTelemetryArgs(t *testing.T) {
8989
},
9090
prometheusEnabled: true,
9191
expectedArgs: []string{
92+
"--otel-metrics-enabled=true",
9293
"--enable-prometheus-metrics-path",
9394
},
9495
},
@@ -105,7 +106,7 @@ func TestTelemetryArgs(t *testing.T) {
105106
},
106107
},
107108
{
108-
name: "complete OpenTelemetry config",
109+
name: "complete OpenTelemetry config and prometheus enabled",
109110
telemetryConfig: &mcpv1alpha1.TelemetryConfig{
110111
OpenTelemetry: &mcpv1alpha1.OpenTelemetryConfig{
111112
ServiceName: "complete-service",
@@ -114,6 +115,10 @@ func TestTelemetryArgs(t *testing.T) {
114115
Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{
115116
Enabled: true,
116117
},
118+
Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{
119+
Enabled: true,
120+
SamplingRate: "0.1",
121+
},
117122
},
118123
Prometheus: &mcpv1alpha1.PrometheusConfig{
119124
Enabled: true,
@@ -123,6 +128,9 @@ func TestTelemetryArgs(t *testing.T) {
123128
expectedArgs: []string{
124129
"--otel-service-name=complete-service",
125130
"--otel-headers=authorization=bearer token123",
131+
"--otel-tracing-enabled=true",
132+
"--otel-tracing-sampling-rate=0.1",
133+
"--otel-metrics-enabled=true",
126134
"--enable-prometheus-metrics-path",
127135
},
128136
},
@@ -164,7 +172,9 @@ func TestTelemetryArgs(t *testing.T) {
164172

165173
args := r.generateOpenTelemetryArgs(mcpServer)
166174
args = append(args, r.generatePrometheusArgs(mcpServer)...)
167-
assert.Equal(t, tt.expectedArgs, args)
175+
176+
// Check that all expected arguments are present, regardless of order
177+
assert.ElementsMatch(t, tt.expectedArgs, args)
168178
})
169179
}
170180
}

cmd/thv-proxyrunner/app/run.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ var (
7373
runOtelEndpoint string
7474
runOtelServiceName string
7575
runOtelHeaders []string
76+
runOtelTracingEnabled bool
77+
runOtelMetricsEnabled bool
7678
runOtelInsecure bool
79+
runOtelTracingSamplingRate float64
7780
enablePrometheusMetricsPath bool
7881

7982
// Network isolation flag
@@ -188,9 +191,14 @@ func init() {
188191
"OpenTelemetry OTLP headers in key=value format (e.g., x-honeycomb-team=your-api-key)")
189192
runCmd.Flags().BoolVar(&runOtelInsecure, "otel-insecure", false,
190193
"Connect to the OpenTelemetry endpoint using HTTP instead of HTTPS")
194+
runCmd.Flags().BoolVar(&runOtelTracingEnabled, "otel-tracing-enabled", false,
195+
"Enable distributed tracing (when OTLP endpoint is configured)")
196+
runCmd.Flags().BoolVar(&runOtelMetricsEnabled, "otel-metrics-enabled", false,
197+
"Enable OTLP metrics export (when OTLP endpoint is configured)")
198+
runCmd.Flags().Float64Var(&runOtelTracingSamplingRate, "otel-tracing-sampling-rate", 0.0,
199+
"OpenTelemetry trace sampling rate (0.0-1.0)")
191200
runCmd.Flags().BoolVar(&enablePrometheusMetricsPath, "enable-prometheus-metrics-path", false,
192201
"Enable Prometheus-style /metrics endpoint on the main transport port")
193-
194202
runCmd.Flags().BoolVar(&runIsolateNetwork, "isolate-network", false,
195203
"Isolate the container network from the host (default: false)")
196204
runCmd.Flags().StringArrayVar(
@@ -236,7 +244,7 @@ func runCmdFunc(cmd *cobra.Command, args []string) error {
236244
// Get debug mode flag
237245
debugMode, _ := cmd.Flags().GetBool("debug")
238246

239-
finalOtelSamplingRate, finalOtelEnvironmentVariables := 0.0, []string{}
247+
finalOtelEnvironmentVariables := []string{}
240248

241249
// Create container runtime
242250
rt, err := container.NewFactory().Create(ctx)
@@ -279,8 +287,9 @@ func runCmdFunc(cmd *cobra.Command, args []string) error {
279287
WithAuditEnabled(runEnableAudit, runAuditConfig).
280288
WithOIDCConfig(oidcIssuer, oidcAudience, oidcJwksURL, oidcIntrospectionURL, oidcClientID, oidcClientSecret,
281289
runThvCABundle, runJWKSAuthTokenFile, runResourceURL, runJWKSAllowPrivateIP).
282-
WithTelemetryConfig(runOtelEndpoint, enablePrometheusMetricsPath, runOtelServiceName,
283-
finalOtelSamplingRate, runOtelHeaders, runOtelInsecure, finalOtelEnvironmentVariables).
290+
WithTelemetryConfig(runOtelEndpoint, enablePrometheusMetricsPath, runOtelTracingEnabled,
291+
runOtelMetricsEnabled, runOtelServiceName, runOtelTracingSamplingRate,
292+
runOtelHeaders, runOtelInsecure, finalOtelEnvironmentVariables).
284293
WithToolsFilter(runToolsFilter)
285294

286295
// Process environment files

cmd/thv/app/run_flags.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ type RunFlags struct {
7070
// Telemetry configuration
7171
OtelEndpoint string
7272
OtelServiceName string
73+
OtelTracingEnabled bool
74+
OtelMetricsEnabled bool
7375
OtelSamplingRate float64
7476
OtelHeaders []string
7577
OtelInsecure bool
@@ -174,6 +176,10 @@ func AddRunFlags(cmd *cobra.Command, config *RunFlags) {
174176
"OpenTelemetry OTLP endpoint URL (e.g., https://api.honeycomb.io)")
175177
cmd.Flags().StringVar(&config.OtelServiceName, "otel-service-name", "",
176178
"OpenTelemetry service name (defaults to toolhive-mcp-proxy)")
179+
cmd.Flags().BoolVar(&config.OtelTracingEnabled, "otel-tracing-enabled", false,
180+
"Enable distributed tracing (when OTLP endpoint is configured)")
181+
cmd.Flags().BoolVar(&config.OtelMetricsEnabled, "otel-metrics-enabled", false,
182+
"Enable OTLP metrics export (when OTLP endpoint is configured)")
177183
cmd.Flags().Float64Var(&config.OtelSamplingRate, "otel-sampling-rate", 0.1, "OpenTelemetry trace sampling rate (0.0-1.0)")
178184
cmd.Flags().StringArrayVar(&config.OtelHeaders, "otel-headers", nil,
179185
"OpenTelemetry OTLP headers in key=value format (e.g., x-honeycomb-team=your-api-key)")
@@ -292,8 +298,8 @@ func setupTelemetryConfiguration(cmd *cobra.Command, runFlags *RunFlags) *teleme
292298
runFlags.OtelEndpoint, runFlags.OtelSamplingRate, runFlags.OtelEnvironmentVariables)
293299

294300
return createTelemetryConfig(finalOtelEndpoint, runFlags.OtelEnablePrometheusMetricsPath,
295-
runFlags.OtelServiceName, finalOtelSamplingRate, runFlags.OtelHeaders, runFlags.OtelInsecure,
296-
finalOtelEnvironmentVariables)
301+
runFlags.OtelServiceName, runFlags.OtelTracingEnabled, runFlags.OtelMetricsEnabled, finalOtelSamplingRate,
302+
runFlags.OtelHeaders, runFlags.OtelInsecure, finalOtelEnvironmentVariables)
297303
}
298304

299305
// setupRuntimeAndValidation creates container runtime and selects environment variable validator
@@ -450,7 +456,8 @@ func buildRunnerConfig(
450456
// Set additional configurations that are still needed in old format for other parts of the system
451457
builder = builder.WithOIDCConfig(oidcIssuer, oidcAudience, oidcJwksURL, oidcIntrospectionURL, oidcClientID, oidcClientSecret,
452458
runFlags.ThvCABundle, runFlags.JWKSAuthTokenFile, runFlags.ResourceURL, runFlags.JWKSAllowPrivateIP).
453-
WithTelemetryConfig(finalOtelEndpoint, runFlags.OtelEnablePrometheusMetricsPath, runFlags.OtelServiceName,
459+
WithTelemetryConfig(finalOtelEndpoint, runFlags.OtelEnablePrometheusMetricsPath,
460+
runFlags.OtelTracingEnabled, runFlags.OtelMetricsEnabled, runFlags.OtelServiceName,
454461
finalOtelSamplingRate, runFlags.OtelHeaders, runFlags.OtelInsecure, finalOtelEnvironmentVariables).
455462
WithToolsFilter(runFlags.ToolsFilter)
456463

@@ -585,7 +592,7 @@ func createOIDCConfig(oidcIssuer, oidcAudience, oidcJwksURL, oidcIntrospectionUR
585592

586593
// createTelemetryConfig creates a telemetry configuration if any telemetry parameters are provided
587594
func createTelemetryConfig(otelEndpoint string, otelEnablePrometheusMetricsPath bool,
588-
otelServiceName string, otelSamplingRate float64, otelHeaders []string,
595+
otelServiceName string, otelTracingEnabled bool, otelMetricsEnabled bool, otelSamplingRate float64, otelHeaders []string,
589596
otelInsecure bool, otelEnvironmentVariables []string) *telemetry.Config {
590597
if otelEndpoint == "" && !otelEnablePrometheusMetricsPath {
591598
return nil
@@ -623,6 +630,8 @@ func createTelemetryConfig(otelEndpoint string, otelEnablePrometheusMetricsPath
623630
Endpoint: otelEndpoint,
624631
ServiceName: serviceName,
625632
ServiceVersion: telemetry.DefaultConfig().ServiceVersion,
633+
TracingEnabled: otelTracingEnabled,
634+
MetricsEnabled: otelMetricsEnabled,
626635
SamplingRate: otelSamplingRate,
627636
Headers: headers,
628637
Insecure: otelInsecure,

deploy/charts/operator-crds/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: toolhive-operator-crds
33
description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
44
type: application
5-
version: 0.0.19
5+
version: 0.0.20
66
appVersion: "0.0.1"

deploy/charts/operator-crds/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
# ToolHive Operator CRDs Helm Chart
33

4-
![Version: 0.0.19](https://img.shields.io/badge/Version-0.0.19-informational?style=flat-square)
4+
![Version: 0.0.20](https://img.shields.io/badge/Version-0.0.20-informational?style=flat-square)
55
![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
66

77
A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.

deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8858,7 +8858,7 @@ spec:
88588858
configuration
88598859
properties:
88608860
enabled:
8861-
default: true
8861+
default: false
88628862
description: Enabled controls whether OTLP metrics are
88638863
sent
88648864
type: boolean
@@ -8868,6 +8868,19 @@ spec:
88688868
ServiceName is the service name for telemetry
88698869
If not specified, defaults to the MCPServer name
88708870
type: string
8871+
tracing:
8872+
description: Tracing defines OpenTelemetry tracing configuration
8873+
properties:
8874+
enabled:
8875+
default: false
8876+
description: Enabled controls whether OTLP tracing is
8877+
sent
8878+
type: boolean
8879+
samplingRate:
8880+
default: "0.05"
8881+
description: SamplingRate is the trace sampling rate (0.0-1.0)
8882+
type: string
8883+
type: object
88718884
type: object
88728885
prometheus:
88738886
description: Prometheus defines Prometheus-specific configuration

deploy/otel/README.md

Lines changed: 0 additions & 37 deletions
This file was deleted.

0 commit comments

Comments
 (0)