Skip to content

Commit e307e2c

Browse files
authored
fix(llmobs): openai-java payload mapping for responses, tool metadata, and prompt tracking (#10644)
llmobs: set model tag even when llmobs disabled Set metadata.stream tag no matter it's true or false Set chat/completion CACHE_READ_INPUT_TOKENS tag Set error nad error_type tags Use "" instead of null for the role in CompletionDecorator to comply wthTestOpenAiLlmInteractions::test_completion Use "" instead of null for the content to comply with TestOpenAiLlmInteractions::test_chat_completion_tool_call Add missing metatadata.tool_choice Add missing tool_definitions Add source:integration tag Add missing _dd attribute to the llmobs span event Add missing error tags Remove error from the llmobs span event. It must be part of meta block Add missing meta.text.verbosity Add summaryText and encrypted_content Add missing tool_calls and tool_results for responses Always set stream param to produce the same request body to be aligned with python openai instrumentation and system-tests Add OpenAI prompt-tracking reconstruction for responses (input.prompt with variables + chat_template, longest-first overlap handling) and support map-based LLM input serialization (messages + prompt) in LLMObs mapper. Also filter empty instruction messages to match system-test expectations. Fix OpenAI Responses prompt tracking to use response instructions first and return [image] (not empty) when stripped input_image URLs are missing, aligning mixed-input chat_template output with expected behavior. Set LLMObs error-path defaults in Java to always emit model_name and output.messages from request params so existing error-span tests pass. Add OpenAI Responses tool definition extraction to populate LLMObs tool_definitions tags Fix ChatCompletionServiceTest Extract JsonValueUtils Refactor OpenAI responses instrumentation to reuse ToolCallExtractor JSON argument parsing and remove duplicate manual parsing logic from ResponseDecorator. Fix test assertions Add integration tag Add ddtrace.verion Improve test assertions Merge branch 'master' into ygree/llmobs-systest-fixes Fix format Include input messages when instructions are present in prompt tracking Fix instructions role to system in prompt tracking Merge branch 'master' into ygree/llmobs-systest-fixes fix LLMObsSpanMapperTest Catch exception not throwable Add JsonValueUtilsTest Test that on HTTP error, the OpenAI response span retains model_name and placeholder output set by withResponseCreateParams. Add "create response with prompt tracking" test to improve coverage of enrichInputWithPromptTracking(), extractChatTemplate(), extractPromptFromParams(), and normalizePromptVariable() Add "create response with custom tool call" test to improve coverage of getToolCall Prevent NPE when tag value is null Replace catch Throwable with catch Exception responseCreateParamsWithPromptTracking support both known and unknown format. Test cover extractPromptFromParams and related methods Use safe accessors throughout OpenAI Java decorators Simplify OpenAI decorator fallbacks without dropping raw JSON support clean up dead code improve coverage for chat/completion test with raw tool definition improve coverage for extractFunctionToolDefinition improve coverage for extractFunctionToolDefinition improve coverage for mcp tool call improve coverage for tool choice Fix latestDepTest Co-authored-by: yury.gribkov <yury.gribkov@datadoghq.com>
1 parent 65b8d53 commit e307e2c

32 files changed

Lines changed: 3235 additions & 370 deletions

File tree

dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/domain/DDLLMObsSpan.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import datadog.context.ContextScope;
44
import datadog.trace.api.DDSpanTypes;
5+
import datadog.trace.api.DDTraceApiInfo;
56
import datadog.trace.api.DDTraceId;
67
import datadog.trace.api.WellKnownTags;
78
import datadog.trace.api.llmobs.LLMObs;
@@ -39,6 +40,7 @@ public class DDLLMObsSpan implements LLMObsSpan {
3940

4041
private static final String SERVICE = LLMOBS_TAG_PREFIX + "service";
4142
private static final String VERSION = LLMOBS_TAG_PREFIX + "version";
43+
private static final String DDTRACE_VERSION = LLMOBS_TAG_PREFIX + "ddtrace.version";
4244
private static final String ENV = LLMOBS_TAG_PREFIX + "env";
4345

4446
private static final String LLM_OBS_INSTRUMENTATION_NAME = "llmobs";
@@ -76,6 +78,7 @@ public DDLLMObsSpan(
7678
span.setTag(ENV, wellKnownTags.getEnv());
7779
span.setTag(SERVICE, wellKnownTags.getService());
7880
span.setTag(VERSION, wellKnownTags.getVersion());
81+
span.setTag(DDTRACE_VERSION, DDTraceApiInfo.VERSION);
7982

8083
span.setTag(SPAN_KIND, kind);
8184
spanKind = kind;

dd-java-agent/agent-llmobs/src/test/groovy/datadog/trace/llmobs/domain/DDLLMObsSpanTest.groovy

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace
44

55
import datadog.trace.agent.tooling.TracerInstaller
66
import datadog.trace.api.DDTags
7+
import datadog.trace.api.DDTraceApiInfo
78
import datadog.trace.api.IdGenerationStrategy
89
import datadog.trace.api.WellKnownTags
910
import datadog.trace.api.telemetry.LLMObsMetricCollector
@@ -134,6 +135,8 @@ class DDLLMObsSpanTest extends DDSpecification{
134135
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
135136
tagVersion instanceof UTF8BytesString
136137
"v1" == tagVersion.toString()
138+
139+
DDTraceApiInfo.VERSION == innerSpan.getTag(LLMOBS_TAG_PREFIX + "ddtrace.version")
137140
}
138141

139142
def "test span with overwrites"() {
@@ -219,6 +222,8 @@ class DDLLMObsSpanTest extends DDSpecification{
219222
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
220223
tagVersion instanceof UTF8BytesString
221224
"v1" == tagVersion.toString()
225+
226+
DDTraceApiInfo.VERSION == innerSpan.getTag(LLMOBS_TAG_PREFIX + "ddtrace.version")
222227
}
223228

224229
def "test llm span string input formatted to messages"() {
@@ -270,6 +275,8 @@ class DDLLMObsSpanTest extends DDSpecification{
270275
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
271276
tagVersion instanceof UTF8BytesString
272277
"v1" == tagVersion.toString()
278+
279+
DDTraceApiInfo.VERSION == innerSpan.getTag(LLMOBS_TAG_PREFIX + "ddtrace.version")
273280
}
274281

275282
def "test llm span with messages"() {
@@ -326,6 +333,8 @@ class DDLLMObsSpanTest extends DDSpecification{
326333
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
327334
tagVersion instanceof UTF8BytesString
328335
"v1" == tagVersion.toString()
336+
337+
DDTraceApiInfo.VERSION == innerSpan.getTag(LLMOBS_TAG_PREFIX + "ddtrace.version")
329338
}
330339

331340
def "finish records span.finished telemetry when LLMObs enabled"() {

dd-java-agent/instrumentation/openai-java/openai-java-3.0/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
apply from: "$rootDir/gradle/java.gradle"
22

3-
def minVer = '3.0.0'
3+
def minVer = '3.0.1'
44

55
muzzle {
66
pass {

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/main/java/datadog/trace/instrumentation/openai_java/ChatCompletionDecorator.java

Lines changed: 149 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
package datadog.trace.instrumentation.openai_java;
22

3+
import static datadog.trace.instrumentation.openai_java.JsonValueUtils.jsonValueMapToObject;
4+
import static datadog.trace.instrumentation.openai_java.JsonValueUtils.jsonValueToObject;
5+
6+
import com.openai.core.JsonValue;
37
import com.openai.helpers.ChatCompletionAccumulator;
8+
import com.openai.models.FunctionDefinition;
49
import com.openai.models.chat.completions.ChatCompletion;
510
import com.openai.models.chat.completions.ChatCompletionChunk;
611
import com.openai.models.chat.completions.ChatCompletionCreateParams;
12+
import com.openai.models.chat.completions.ChatCompletionFunctionTool;
713
import com.openai.models.chat.completions.ChatCompletionMessage;
814
import com.openai.models.chat.completions.ChatCompletionMessageParam;
915
import com.openai.models.chat.completions.ChatCompletionMessageToolCall;
16+
import com.openai.models.chat.completions.ChatCompletionTool;
1017
import datadog.trace.api.Config;
1118
import datadog.trace.api.llmobs.LLMObs;
1219
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
@@ -17,6 +24,7 @@
1724
import java.util.HashMap;
1825
import java.util.List;
1926
import java.util.Map;
27+
import java.util.Objects;
2028
import java.util.Optional;
2129
import java.util.stream.Collectors;
2230

@@ -31,33 +39,37 @@ public void withChatCompletionCreateParams(
3139
AgentSpan span, ChatCompletionCreateParams params, boolean stream) {
3240
span.setResourceName(CHAT_COMPLETIONS_CREATE);
3341
span.setTag(CommonTags.OPENAI_REQUEST_ENDPOINT, "/v1/chat/completions");
34-
if (!llmObsEnabled) {
42+
if (params == null) {
3543
return;
3644
}
45+
Optional<String> modelName = extractChatModelName(params);
46+
modelName.ifPresent(str -> span.setTag(CommonTags.OPENAI_REQUEST_MODEL, str));
3747

38-
span.setTag(CommonTags.SPAN_KIND, Tags.LLMOBS_LLM_SPAN_KIND);
39-
if (params == null) {
48+
if (!llmObsEnabled) {
4049
return;
4150
}
42-
params
43-
.model()
44-
._value()
45-
.asString()
46-
.ifPresent(str -> span.setTag(CommonTags.OPENAI_REQUEST_MODEL, str));
51+
52+
// Keep model_name and output shape stable on error paths where no response is available.
53+
modelName.ifPresent(
54+
str -> {
55+
span.setTag(CommonTags.MODEL_NAME, str);
56+
span.setTag(CommonTags.OUTPUT, Collections.singletonList(LLMObs.LLMMessage.from("", "")));
57+
});
58+
59+
span.setTag(CommonTags.SPAN_KIND, Tags.LLMOBS_LLM_SPAN_KIND);
4760

4861
span.setTag(
4962
CommonTags.INPUT,
5063
params.messages().stream()
5164
.map(ChatCompletionDecorator::llmMessage)
65+
.filter(Objects::nonNull)
5266
.collect(Collectors.toList()));
5367

5468
Map<String, Object> metadata = new HashMap<>();
5569
// maxTokens is deprecated but integration tests missing to provide maxCompletionTokens
5670
params.maxTokens().ifPresent(v -> metadata.put("max_tokens", v));
5771
params.temperature().ifPresent(v -> metadata.put("temperature", v));
58-
if (stream) {
59-
metadata.put("stream", true);
60-
}
72+
metadata.put("stream", stream);
6173
params
6274
.streamOptions()
6375
.ifPresent(
@@ -72,67 +84,166 @@ public void withChatCompletionCreateParams(
7284
params.n().ifPresent(v -> metadata.put("n", v));
7385
params.seed().ifPresent(v -> metadata.put("seed", v));
7486
span.setTag(CommonTags.METADATA, metadata);
87+
params
88+
.toolChoice()
89+
.ifPresent(
90+
toolChoice -> {
91+
String choice = null;
92+
if (toolChoice.isAuto()) {
93+
choice = "auto";
94+
} else if (toolChoice.isAllowedToolChoice()) {
95+
choice = "allowed_tools";
96+
} else if (toolChoice.isNamedToolChoice()) {
97+
choice = "function";
98+
} else if (toolChoice.isNamedToolChoiceCustom()) {
99+
choice = "custom";
100+
}
101+
if (choice != null) {
102+
metadata.put("tool_choice", choice);
103+
}
104+
});
105+
106+
List<ChatCompletionTool> tools = params._tools().asKnown().orElse(Collections.emptyList());
107+
if (!tools.isEmpty()) {
108+
span.setTag(CommonTags.TOOL_DEFINITIONS, extractToolDefinitions(tools));
109+
}
110+
}
111+
112+
private Optional<String> extractChatModelName(ChatCompletionCreateParams params) {
113+
Optional<String> modelName =
114+
params._model().asKnown().flatMap(model -> model._value().asString());
115+
return modelName.isPresent() ? modelName : params._model().asString();
116+
}
117+
118+
private List<Map<String, Object>> extractToolDefinitions(List<ChatCompletionTool> tools) {
119+
List<Map<String, Object>> toolDefinitions = new ArrayList<>();
120+
for (ChatCompletionTool tool : tools) {
121+
if (tool.isFunction()) {
122+
Map<String, Object> toolDef = extractFunctionToolDef(tool.asFunction());
123+
if (toolDef != null) {
124+
toolDefinitions.add(toolDef);
125+
}
126+
}
127+
}
128+
return toolDefinitions;
129+
}
130+
131+
private static Map<String, Object> extractFunctionToolDef(ChatCompletionFunctionTool funcTool) {
132+
// Try typed access first (works when built programmatically)
133+
Optional<FunctionDefinition> funcDefOpt = funcTool._function().asKnown();
134+
if (funcDefOpt.isPresent()) {
135+
FunctionDefinition funcDef = funcDefOpt.get();
136+
Map<String, Object> toolDef = new HashMap<>();
137+
toolDef.put("name", funcDef.name());
138+
funcDef.description().ifPresent(desc -> toolDef.put("description", desc));
139+
funcDef
140+
.parameters()
141+
.ifPresent(
142+
params ->
143+
toolDef.put("schema", jsonValueMapToObject(params._additionalProperties())));
144+
return toolDef;
145+
}
146+
147+
// Fall back to raw JSON extraction (when deserialized from HTTP request)
148+
Optional<JsonValue> rawOpt = funcTool._function().asUnknown();
149+
if (!rawOpt.isPresent()) {
150+
return null;
151+
}
152+
Optional<Map<String, JsonValue>> objOpt = rawOpt.get().asObject();
153+
if (!objOpt.isPresent()) {
154+
return null;
155+
}
156+
Map<String, JsonValue> obj = objOpt.get();
157+
JsonValue nameValue = obj.get("name");
158+
if (nameValue == null) {
159+
return null;
160+
}
161+
Optional<String> nameOpt = nameValue.asString();
162+
if (!nameOpt.isPresent()) {
163+
return null;
164+
}
165+
Map<String, Object> toolDef = new HashMap<>();
166+
toolDef.put("name", nameOpt.get());
167+
JsonValue descValue = obj.get("description");
168+
if (descValue != null) {
169+
descValue.asString().ifPresent(desc -> toolDef.put("description", desc));
170+
}
171+
JsonValue paramsValue = obj.get("parameters");
172+
if (paramsValue != null) {
173+
Object schema = jsonValueToObject(paramsValue);
174+
if (schema != null) {
175+
toolDef.put("schema", schema);
176+
}
177+
}
178+
return toolDef;
75179
}
76180

77181
private static LLMObs.LLMMessage llmMessage(ChatCompletionMessageParam m) {
78-
String role = "unknown";
79-
String content = null;
80182
if (m.isAssistant()) {
81-
role = "assistant";
82-
content = m.asAssistant().content().map(v -> v.text().orElse(null)).orElse(null);
183+
return LLMObs.LLMMessage.from(
184+
"assistant", m.asAssistant().content().map(v -> v.text().orElse(null)).orElse(null));
83185
} else if (m.isDeveloper()) {
84-
role = "developer";
85-
content = m.asDeveloper().content().text().orElse(null);
186+
return LLMObs.LLMMessage.from("developer", m.asDeveloper().content().text().orElse(null));
86187
} else if (m.isSystem()) {
87-
role = "system";
88-
content = m.asSystem().content().text().orElse(null);
188+
return LLMObs.LLMMessage.from("system", m.asSystem().content().text().orElse(null));
89189
} else if (m.isTool()) {
90-
role = "tool";
91-
content = m.asTool().content().text().orElse(null);
190+
return LLMObs.LLMMessage.from("tool", m.asTool().content().text().orElse(null));
92191
} else if (m.isUser()) {
93-
role = "user";
94-
content = m.asUser().content().text().orElse(null);
192+
return LLMObs.LLMMessage.from("user", m.asUser().content().text().orElse(null));
95193
}
96-
return LLMObs.LLMMessage.from(role, content);
194+
return null;
97195
}
98196

99197
public void withChatCompletion(AgentSpan span, ChatCompletion completion) {
198+
String modelName = completion._model().asString().orElse(null);
199+
span.setTag(CommonTags.OPENAI_RESPONSE_MODEL, modelName);
200+
span.setTag(CommonTags.MODEL_NAME, modelName);
201+
100202
if (!llmObsEnabled) {
101203
return;
102204
}
103-
String modelName = completion.model();
104-
span.setTag(CommonTags.OPENAI_RESPONSE_MODEL, modelName);
105-
span.setTag(CommonTags.MODEL_NAME, modelName);
106205

107206
List<LLMObs.LLMMessage> output =
108-
completion.choices().stream()
207+
completion._choices().asKnown().orElse(Collections.emptyList()).stream()
109208
.map(ChatCompletionDecorator::llmMessage)
209+
.filter(Objects::nonNull)
110210
.collect(Collectors.toList());
111211
span.setTag(CommonTags.OUTPUT, output);
112212

113213
completion
114-
.usage()
214+
._usage()
215+
.asKnown()
115216
.ifPresent(
116217
usage -> {
117218
span.setTag(CommonTags.INPUT_TOKENS, usage.promptTokens());
118219
span.setTag(CommonTags.OUTPUT_TOKENS, usage.completionTokens());
119220
span.setTag(CommonTags.TOTAL_TOKENS, usage.totalTokens());
221+
usage
222+
.promptTokensDetails()
223+
.flatMap(details -> details.cachedTokens())
224+
.ifPresent(v -> span.setTag(CommonTags.CACHE_READ_INPUT_TOKENS, v));
120225
});
121226
}
122227

123228
private static LLMObs.LLMMessage llmMessage(ChatCompletion.Choice choice) {
124-
ChatCompletionMessage msg = choice.message();
125-
Optional<?> roleOpt = msg._role().asString();
126-
String role = "unknown";
127-
if (roleOpt.isPresent()) {
128-
role = String.valueOf(roleOpt.get());
229+
Optional<ChatCompletionMessage> msgOpt = choice._message().asKnown();
230+
if (!msgOpt.isPresent()) {
231+
return null;
232+
}
233+
234+
ChatCompletionMessage msg = msgOpt.get();
235+
Optional<String> roleOpt = msg._role().asString();
236+
if (!roleOpt.isPresent()) {
237+
return null;
129238
}
130-
String content = msg.content().orElse(null);
239+
String role = roleOpt.get();
240+
String content = msg._content().asString().orElse("");
131241

132-
Optional<List<ChatCompletionMessageToolCall>> toolCallsOpt = msg.toolCalls();
133-
if (toolCallsOpt.isPresent() && !toolCallsOpt.get().isEmpty()) {
242+
List<ChatCompletionMessageToolCall> toolCallsOpt =
243+
msg._toolCalls().asKnown().orElse(Collections.emptyList());
244+
if (!toolCallsOpt.isEmpty()) {
134245
List<LLMObs.ToolCall> toolCalls = new ArrayList<>();
135-
for (ChatCompletionMessageToolCall toolCall : toolCallsOpt.get()) {
246+
for (ChatCompletionMessageToolCall toolCall : toolCallsOpt) {
136247
LLMObs.ToolCall llmObsToolCall = ToolCallExtractor.getToolCall(toolCall);
137248
if (llmObsToolCall != null) {
138249
toolCalls.add(llmObsToolCall);

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/main/java/datadog/trace/instrumentation/openai_java/ChatCompletionModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public String[] helperClassNames() {
1818
packageName + ".CommonTags",
1919
packageName + ".ChatCompletionDecorator",
2020
packageName + ".OpenAiDecorator",
21+
packageName + ".JsonValueUtils",
2122
packageName + ".HttpResponseWrapper",
2223
packageName + ".HttpStreamResponseWrapper",
2324
packageName + ".HttpStreamResponseStreamWrapper",

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/main/java/datadog/trace/instrumentation/openai_java/CommonTags.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,20 @@ interface CommonTags {
1919
String MODEL_PROVIDER = TAG_PREFIX + LLMObsTags.MODEL_PROVIDER;
2020

2121
String ML_APP = TAG_PREFIX + LLMObsTags.ML_APP;
22+
String INTEGRATION = TAG_PREFIX + "integration";
2223
String VERSION = TAG_PREFIX + "version";
24+
String DDTRACE_VERSION = TAG_PREFIX + "ddtrace.version";
25+
String SOURCE = TAG_PREFIX + "source";
26+
27+
String ERROR = TAG_PREFIX + "error";
28+
String ERROR_TYPE = TAG_PREFIX + "error_type";
2329

2430
String ENV = TAG_PREFIX + "env";
2531
String SERVICE = TAG_PREFIX + "service";
2632
String PARENT_ID = TAG_PREFIX + "parent_id";
2733

34+
String TOOL_DEFINITIONS = TAG_PREFIX + "tool_definitions";
35+
2836
String METRIC_PREFIX = "_ml_obs_metric.";
2937
String INPUT_TOKENS = METRIC_PREFIX + "input_tokens";
3038
String OUTPUT_TOKENS = METRIC_PREFIX + "output_tokens";
@@ -33,4 +41,5 @@ interface CommonTags {
3341
String CACHE_READ_INPUT_TOKENS = METRIC_PREFIX + "cache_read_input_tokens";
3442

3543
String REQUEST_REASONING = "_ml_obs_request.reasoning";
44+
String REQUEST_PROMPT = "_ml_obs_request.prompt";
3645
}

0 commit comments

Comments
 (0)