Summary
Add OpenTelemetry tracing integration to enable distributed tracing across network operations, providing visibility into request flows and performance bottlenecks.
Current State
- Basic metrics reporting via
metric_reporter
- No distributed tracing support
- No correlation between requests across services
OpenTelemetry Benefits
- Standardized tracing API
- Vendor-neutral (export to Jaeger, Zipkin, OTLP, etc.)
- Context propagation for distributed systems
- Integration with existing observability platforms
Proposed Implementation
1. Tracing Context
namespace kcenon::network::tracing {
class trace_context {
public:
// Get current trace context from thread-local storage
static auto current() -> trace_context;
// Create new root span
static auto create_span(std::string_view name) -> span;
// Create child span
auto create_child_span(std::string_view name) const -> span;
// Propagation
auto to_headers() const -> std::vector<std::pair<std::string, std::string>>;
static auto from_headers(const std::vector<http_header>& headers)
-> trace_context;
private:
std::string trace_id_;
std::string span_id_;
std::string parent_span_id_;
bool sampled_{true};
};
} // namespace kcenon::network::tracing
2. Span Class
class span {
public:
span(std::string_view name, trace_context ctx);
~span(); // Auto-end on destruction
// Set attributes
auto set_attribute(std::string_view key, std::string_view value) -> span&;
auto set_attribute(std::string_view key, int64_t value) -> span&;
auto set_attribute(std::string_view key, double value) -> span&;
// Record events
auto add_event(std::string_view name) -> span&;
auto add_event(std::string_view name,
const std::map<std::string, std::string>& attrs) -> span&;
// Status
auto set_status(span_status status) -> span&;
auto set_error(std::string_view message) -> span&;
// Manual end
void end();
// Get context for propagation
[[nodiscard]] auto context() const -> trace_context;
private:
struct impl;
std::unique_ptr<impl> impl_;
};
// RAII helper
#define NETWORK_TRACE_SPAN(name) \
auto _span = kcenon::network::tracing::trace_context::create_span(name)
3. Integration Points
TCP Client
auto messaging_client::start_client(host, port) -> VoidResult {
NETWORK_TRACE_SPAN("tcp.client.connect");
_span.set_attribute("net.peer.name", host);
_span.set_attribute("net.peer.port", port);
auto result = do_connect(host, port);
if (result.is_err()) {
_span.set_error(result.error().message);
}
return result;
}
auto messaging_client::send_packet(data) -> VoidResult {
NETWORK_TRACE_SPAN("tcp.client.send");
_span.set_attribute("message.size", data.size());
return do_send(std::move(data));
}
HTTP/2 Client
auto http2_client::get(path, headers) -> Result<http2_response> {
NETWORK_TRACE_SPAN("http2.request");
_span.set_attribute("http.method", "GET");
_span.set_attribute("http.url", build_url(path));
// Inject trace context into request headers
auto trace_headers = trace_context::current().to_headers();
for (const auto& [k, v] : trace_headers) {
headers.push_back({k, v});
}
auto result = do_request("GET", path, headers, {});
if (result.is_ok()) {
_span.set_attribute("http.status_code", result.value().status_code);
}
return result;
}
QUIC Connection
auto connection::send_stream_data(stream_id, data) -> VoidResult {
NETWORK_TRACE_SPAN("quic.stream.send");
_span.set_attribute("quic.stream_id", stream_id);
_span.set_attribute("quic.data_length", data.size());
return do_send_stream(stream_id, data);
}
4. Exporter Configuration
struct tracing_config {
enum class exporter_type {
none, // Disabled
console, // Console output
otlp_grpc, // OTLP gRPC
otlp_http, // OTLP HTTP
jaeger, // Jaeger
zipkin, // Zipkin
};
exporter_type exporter = exporter_type::none;
std::string endpoint; // Collector endpoint
double sample_rate = 1.0; // 0.0 to 1.0
std::string service_name = "network_system";
std::map<std::string, std::string> resource_attributes;
};
void configure_tracing(const tracing_config& config);
void shutdown_tracing();
5. Semantic Conventions
Follow OpenTelemetry semantic conventions:
| Attribute |
Description |
net.peer.name |
Remote hostname |
net.peer.port |
Remote port |
net.transport |
Transport protocol (tcp, udp, quic) |
http.method |
HTTP method |
http.url |
Full URL |
http.status_code |
HTTP status code |
rpc.system |
RPC system (grpc) |
rpc.service |
RPC service name |
rpc.method |
RPC method name |
Tasks
Acceptance Criteria
Dependencies
- OpenTelemetry C++ SDK (opentelemetry-cpp)
Files to Create
include/kcenon/network/tracing/trace_context.h
include/kcenon/network/tracing/span.h
include/kcenon/network/tracing/tracing_config.h
src/tracing/trace_context.cpp
src/tracing/span.cpp
src/tracing/exporters.cpp
tests/test_tracing.cpp
Related
Summary
Add OpenTelemetry tracing integration to enable distributed tracing across network operations, providing visibility into request flows and performance bottlenecks.
Current State
metric_reporterOpenTelemetry Benefits
Proposed Implementation
1. Tracing Context
2. Span Class
3. Integration Points
TCP Client
HTTP/2 Client
QUIC Connection
4. Exporter Configuration
5. Semantic Conventions
Follow OpenTelemetry semantic conventions:
net.peer.namenet.peer.portnet.transporthttp.methodhttp.urlhttp.status_coderpc.systemrpc.servicerpc.methodTasks
trace_contextclassspanclass with RAIIAcceptance Criteria
Dependencies
Files to Create
include/kcenon/network/tracing/trace_context.hinclude/kcenon/network/tracing/span.hinclude/kcenon/network/tracing/tracing_config.hsrc/tracing/trace_context.cppsrc/tracing/span.cppsrc/tracing/exporters.cpptests/test_tracing.cppRelated