Skip to content

Commit d83ef11

Browse files
jbachorikclaude
andcommitted
refactor: drop Java-side value cache; reject oversized values up front
Java readers of custom attrs are test-only — the profiler signal handler uses sidecar encodings and OTEL eBPF reads attrs_data directly. Drop the per-slot String cache and scan attrs_data on the test-only read path. Reject values whose UTF-8 encoding exceeds 255 bytes at setContextAttribute to keep the OTEP view consistent and avoid orphan Dictionary entries. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 3514d25 commit d83ef11

4 files changed

Lines changed: 172 additions & 94 deletions

File tree

ddprof-lib/src/main/java/com/datadoghq/profiler/ScopeStack.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
* Per-thread stack of {@link ThreadContext} snapshots for nested scopes.
1616
*
1717
* <p>Provides bulk save/restore of the full OTEP record + sidecar state via one memcpy per
18-
* transition, avoiding per-attribute TLS resolution on scope enter/exit. Not thread-safe:
19-
* a single stack instance must be accessed only from its owning thread.
18+
* transition. Not thread-safe: a single stack instance must be accessed only from its
19+
* owning thread.
2020
*
2121
* <p>Storage is tiered to keep shallow nesting allocation-free:
2222
* <ul>

ddprof-lib/src/main/java/com/datadoghq/profiler/ThreadContext.java

Lines changed: 28 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import java.nio.ByteBuffer;
1919
import java.nio.ByteOrder;
2020
import java.nio.charset.StandardCharsets;
21-
import java.util.Arrays;
2221

2322
/**
2423
* Thread-local context for trace/span identification.
@@ -29,13 +28,15 @@
2928
*/
3029
public final class ThreadContext {
3130
private static final int MAX_CUSTOM_SLOTS = 10;
31+
// Max UTF-8 byte length for a custom attribute value. Matches the 1-byte length
32+
// field in the OTEP attrs_data entry header; values above this would be truncated
33+
// silently by replaceOtepAttribute, so reject at the entry point instead — keeps
34+
// the attrs_data view and any future diagnostic readers consistent.
35+
private static final int MAX_VALUE_BYTES = 255;
3236
private static final int OTEL_MAX_RECORD_SIZE = 640;
3337
private static final int SIDECAR_SIZE = MAX_CUSTOM_SLOTS * Integer.BYTES + Long.BYTES; // 48
34-
/**
35-
* Total bytes covered by the combined snapshot buffer: OTEP record + tag encodings + LRS.
36-
* Used by {@link #snapshot(byte[], int)} / {@link #restore(byte[], int)}.
37-
*/
38-
public static final int SNAPSHOT_SIZE = OTEL_MAX_RECORD_SIZE + SIDECAR_SIZE; // 688
38+
// Package-private so ScopeStack can size its byte[] scratch.
39+
static final int SNAPSHOT_SIZE = OTEL_MAX_RECORD_SIZE + SIDECAR_SIZE; // 688
3940
private static final int LRS_OTEP_KEY_INDEX = 0;
4041
// LRS is always a fixed 16-hex-char value in attrs_data (zero-padded u64).
4142
// The entry header is 2 bytes (key_index + length), giving 18 bytes total.
@@ -66,18 +67,6 @@ public final class ThreadContext {
6667
private final int[] attrCacheEncodings = new int[CACHE_SIZE];
6768
private final byte[][] attrCacheBytes = new byte[CACHE_SIZE][];
6869

69-
// Per-slot read-back cache: indexedValueCache[keyIndex] = the String last
70-
// successfully written to attrs_data at that slot. Kept in sync by every
71-
// write (setContextAttributeDirect) and clear (clearContextAttribute,
72-
// setContextDirect). Allows readContextAttribute to return without scanning
73-
// attrs_data or allocating on the warm path.
74-
// null = not yet scanned; ABSENT = scanned/cleared, known absent; other = cached value.
75-
// ABSENT uses new String("") (not "") so it has a unique identity distinct from the
76-
// interned empty-string literal. The == check in readContextAttribute relies on that
77-
// unique sentinel identity to distinguish "known absent" from actual cached values.
78-
private static final String ABSENT = new String("");
79-
private final String[] indexedValueCache = new String[MAX_CUSTOM_SLOTS];
80-
8170
// OTEP record field offsets (from packed struct)
8271
private final int validOffset;
8372
private final int traceIdOffset;
@@ -198,7 +187,6 @@ public void clearContextAttribute(int keyIndex) {
198187
sidecarBuffer.putInt(keyIndex * Integer.BYTES, 0);
199188
removeOtepAttribute(otepKeyIndex);
200189
attach();
201-
indexedValueCache[keyIndex] = ABSENT;
202190
}
203191

204192
public void copyCustoms(int[] value) {
@@ -209,8 +197,8 @@ public void copyCustoms(int[] value) {
209197
}
210198

211199
/**
212-
* Captures the full record + sidecar state into {@code scratch[offset .. offset+SNAPSHOT_SIZE)}.
213-
* Pair with {@link #restore(byte[], int)} for nested-scope propagation.
200+
* Captures the full record + sidecar state into {@code scratch[offset..offset+SNAPSHOT_SIZE)}.
201+
* Pair with {@link #restore} for nested-scope propagation.
214202
*
215203
* <p>The detach/attach pair is required so the captured {@code valid} byte is always 0. When
216204
* {@link #restore} later memcpys the scratch back, the valid byte is overwritten mid-memcpy;
@@ -229,14 +217,11 @@ public void snapshot(byte[] scratch, int offset) {
229217
* going through {@link #recordBuffer}'s valid flag ({@code ContextApi::get} in native code),
230218
* which is the sole gate for sidecar reads (see {@code thread.h}). The scratch's own valid
231219
* byte is always 0 (enforced in {@link #snapshot}), so the memcpy never transiently republishes.
232-
* {@link #indexedValueCache} is invalidated so the next read re-scans the restored attrs_data;
233-
* {@code null} denotes "not yet scanned", distinct from {@link #ABSENT}.
234220
*/
235221
public void restore(byte[] scratch, int offset) {
236222
detach();
237223
combinedBuffer.position(0);
238224
combinedBuffer.put(scratch, offset, SNAPSHOT_SIZE);
239-
Arrays.fill(indexedValueCache, null);
240225
attach();
241226
}
242227

@@ -255,21 +240,14 @@ public void restore(byte[] scratch, int offset) {
255240
* request IDs, and other per-request-unique strings will exhaust the
256241
* Dictionary and cause attributes to be silently dropped.
257242
*
258-
* <p><b>Overflow and orphan encodings:</b> When {@code attrs_data} overflows
259-
* (buffer full), the old entry for {@code keyIndex} is compacted out and the
260-
* new value cannot be written. Both the sidecar and {@code indexedValueCache}
261-
* are cleared so {@link #readContextAttribute} returns {@code null} for this
262-
* slot. However, if the value was not already cached in the per-thread encoding
263-
* cache, {@code registerConstant0} may have already registered it in the native
264-
* Dictionary before the overflow was detected. That Dictionary entry cannot be
265-
* removed — the native Dictionary is write-only for the JVM lifetime. The orphan
266-
* encoding is harmless: it will not appear in JFR events because the sidecar is
267-
* zeroed.
243+
* <p><b>Value size limit.</b> The UTF-8 encoding of {@code value} must fit in
244+
* {@value #MAX_VALUE_BYTES} bytes (the OTEP attrs_data entry length field is one byte).
245+
* Oversized values are rejected up front — they never reach the Dictionary or attrs_data.
268246
*
269247
* @param keyIndex Index into the registered attribute key map (0-based)
270248
* @param value The string value for this attribute
271-
* @return true if the attribute was set successfully, false if the
272-
* Dictionary is full, attrs_data overflows, or keyIndex is out of range
249+
* @return true if the attribute was set successfully, false if the value is too long,
250+
* the Dictionary is full, attrs_data overflows, or keyIndex is out of range
273251
*/
274252
public boolean setContextAttribute(int keyIndex, String value) {
275253
if (keyIndex < 0 || keyIndex >= MAX_CUSTOM_SLOTS || value == null) {
@@ -289,19 +267,24 @@ private boolean setContextAttributeDirect(int keyIndex, String value) {
289267
int encoding;
290268
byte[] utf8;
291269
if (value.equals(attrCacheKeys[slot])) {
270+
// Cache hit — the value was previously validated and cached; no re-check needed.
292271
encoding = attrCacheEncodings[slot];
293272
utf8 = attrCacheBytes[slot];
294273
} else {
295-
// Cache miss: register in Dictionary, encode UTF-8, cache both.
296-
// Allocates byte[] once per unique value; cached for reuse.
274+
// Cache miss: encode UTF-8 and validate size BEFORE touching the Dictionary.
275+
// Rejecting here avoids an orphan Dictionary entry (the native Dictionary is
276+
// write-only for the JVM lifetime and cannot be undone).
277+
utf8 = value.getBytes(StandardCharsets.UTF_8);
278+
if (utf8.length > MAX_VALUE_BYTES) {
279+
return false;
280+
}
297281
encoding = registerConstant0(value);
298282
if (encoding < 0) {
299283
// Dictionary full: clear sidecar AND remove the OTEP attrs_data entry
300284
// so both views stay consistent (both report no value for this key).
301285
clearContextAttribute(keyIndex);
302286
return false;
303287
}
304-
utf8 = value.getBytes(StandardCharsets.UTF_8);
305288
attrCacheEncodings[slot] = encoding;
306289
attrCacheBytes[slot] = utf8;
307290
attrCacheKeys[slot] = value;
@@ -319,7 +302,6 @@ private boolean setContextAttributeDirect(int keyIndex, String value) {
319302
sidecarBuffer.putInt(keyIndex * Integer.BYTES, 0);
320303
}
321304
attach();
322-
indexedValueCache[keyIndex] = written ? value : ABSENT;
323305
return written;
324306
}
325307

@@ -346,7 +328,6 @@ private void setContextDirect(long localRootSpanId, long spanId, long trHi, long
346328
for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) {
347329
// i * Integer.BYTES: byte offset into sidecar buffer for int slot i
348330
sidecarBuffer.putInt(i * Integer.BYTES, 0);
349-
indexedValueCache[i] = ABSENT;
350331
}
351332
// Reset attrs_data_size to contain only the fixed LRS entry, discarding
352333
// any custom attribute entries written during the previous span.
@@ -382,7 +363,6 @@ private void clearContextDirect() {
382363
writeLrsHex(0);
383364
for (int i = 0; i < MAX_CUSTOM_SLOTS; i++) {
384365
sidecarBuffer.putInt(i * Integer.BYTES, 0);
385-
indexedValueCache[i] = ABSENT;
386366
}
387367
sidecarBuffer.putLong(lrsSidecarOffset, 0);
388368
}
@@ -486,19 +466,10 @@ private int compactOtepAttribute(int otepKeyIndex) {
486466
}
487467

488468
/**
489-
* Reads a custom attribute value by key index.
490-
*
491-
* <p>Warm path: O(1), zero-allocation — returns the cached value from
492-
* {@code indexedValueCache[keyIndex]} when the slot was populated by a prior write
493-
* on this thread. The cache is kept in sync by every write ({@link #setContextAttributeDirect})
494-
* and clear ({@link #clearContextAttribute}, {@link #setContextDirect}).
495-
*
496-
* <p>Cold path: scans {@code attrs_data} on first read after profiler restart
497-
* (when the Java cache is unpopulated but the native buffer has pre-existing data).
498-
* Populates the cache slot on success to make subsequent reads warm.
499-
*
500-
* <p>Used by {@code ContextSetter.readContextValue()} to support snapshot/restore of
501-
* nested profiling scopes.
469+
* Reads a custom attribute value by key index by scanning {@code attrs_data}.
470+
* Test-only path: in production, the profiler signal handler reads via sidecar
471+
* encoding IDs and the OTEL eBPF profiler reads attrs_data directly — no Java
472+
* reader is on any hot path. Allocates a new String from the UTF-8 bytes on each call.
502473
*
503474
* @param keyIndex 0-based user key index (same as passed to setContextAttribute)
504475
* @return the attribute value string, or null if not set
@@ -507,18 +478,7 @@ public String readContextAttribute(int keyIndex) {
507478
if (keyIndex < 0 || keyIndex >= MAX_CUSTOM_SLOTS) {
508479
return null;
509480
}
510-
String cached = indexedValueCache[keyIndex];
511-
if (cached == ABSENT) {
512-
return null;
513-
}
514-
if (cached != null) {
515-
return cached;
516-
}
517-
// Cold path: scan attrs_data (only on first read after session restart).
518-
// valid=0 means the record has not been published yet by attach(), or was cleared
519-
// by clearContextDirect() without resetting attrs_data_size. Either way there are
520-
// no valid user attribute entries — only the LRS prefix may exist. Any future path
521-
// that writes user attributes must set valid=1 via attach() first.
481+
// valid=0 → record was detached or never published. No attrs_data to trust.
522482
if (recordBuffer.get(validOffset) == 0) {
523483
return null;
524484
}
@@ -536,13 +496,10 @@ public String readContextAttribute(int keyIndex) {
536496
for (int i = 0; i < len; i++) {
537497
bytes[i] = recordBuffer.get(attrsDataOffset + pos + 2 + i);
538498
}
539-
String value = new String(bytes, StandardCharsets.UTF_8);
540-
indexedValueCache[keyIndex] = value;
541-
return value;
499+
return new String(bytes, StandardCharsets.UTF_8);
542500
}
543501
pos += 2 + len;
544502
}
545-
indexedValueCache[keyIndex] = ABSENT;
546503
return null;
547504
}
548505

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package com.datadoghq.profiler.context;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertThrows;
5+
6+
import java.nio.ByteBuffer;
7+
import java.nio.ByteOrder;
8+
9+
import org.junit.jupiter.api.Assumptions;
10+
import org.junit.jupiter.api.Test;
11+
12+
import com.datadoghq.profiler.ScopeStack;
13+
import com.datadoghq.profiler.ThreadContext;
14+
15+
/**
16+
* Pure-Java unit test for {@link ScopeStack}. Uses heap-backed {@link ByteBuffer}s so
17+
* no native library is required. Exercises depth accounting, underflow, and round-trip
18+
* preservation of trace/span IDs across fast-path and chunked-path depths.
19+
*/
20+
public class ScopeStackTest {
21+
22+
private static final int OTEL_MAX_RECORD_SIZE = 640;
23+
private static final int SIDECAR_SIZE = 48;
24+
private static final int COMBINED_SIZE = OTEL_MAX_RECORD_SIZE + SIDECAR_SIZE;
25+
// Offsets mirror OtelThreadContextRecord in otel_context.h.
26+
private static final int TRACE_ID_OFFSET = 0;
27+
private static final int SPAN_ID_OFFSET = 16;
28+
private static final int VALID_OFFSET = 24;
29+
private static final int ATTRS_DATA_SIZE_OFFSET = 26;
30+
private static final int ATTRS_DATA_OFFSET = 28;
31+
private static final int LRS_SIDECAR_OFFSET = 40; // 10 * sizeof(u32)
32+
33+
private static ThreadContext newContext() {
34+
// Combined buffer aliases record + sidecar via slices sharing the same backing array.
35+
ByteBuffer combined = ByteBuffer.allocate(COMBINED_SIZE).order(ByteOrder.nativeOrder());
36+
combined.position(0).limit(OTEL_MAX_RECORD_SIZE);
37+
ByteBuffer record = combined.slice().order(ByteOrder.nativeOrder());
38+
combined.position(OTEL_MAX_RECORD_SIZE).limit(COMBINED_SIZE);
39+
ByteBuffer sidecar = combined.slice().order(ByteOrder.nativeOrder());
40+
combined.position(0).limit(COMBINED_SIZE);
41+
long[] metadata = {
42+
VALID_OFFSET, TRACE_ID_OFFSET, SPAN_ID_OFFSET,
43+
ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_SIDECAR_OFFSET
44+
};
45+
return new ThreadContext(record, sidecar, combined, metadata);
46+
}
47+
48+
private static void assumeLittleEndian() {
49+
Assumptions.assumeTrue(
50+
ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN,
51+
"ThreadContext only supports little-endian platforms");
52+
}
53+
54+
@Test
55+
public void depthBalance() {
56+
assumeLittleEndian();
57+
ThreadContext ctx = newContext();
58+
ScopeStack stack = new ScopeStack();
59+
assertEquals(0, stack.depth());
60+
stack.enter(ctx);
61+
assertEquals(1, stack.depth());
62+
stack.enter(ctx);
63+
assertEquals(2, stack.depth());
64+
stack.exit(ctx);
65+
assertEquals(1, stack.depth());
66+
stack.exit(ctx);
67+
assertEquals(0, stack.depth());
68+
}
69+
70+
@Test
71+
public void exitUnderflowThrows() {
72+
assumeLittleEndian();
73+
ThreadContext ctx = newContext();
74+
ScopeStack stack = new ScopeStack();
75+
assertThrows(IllegalStateException.class, () -> stack.exit(ctx));
76+
}
77+
78+
@Test
79+
public void fastPathRoundTrip() {
80+
assumeLittleEndian();
81+
ThreadContext ctx = newContext();
82+
ScopeStack stack = new ScopeStack();
83+
84+
ctx.put(/*lrs*/ 100L, /*span*/ 200L, /*trHi*/ 0L, /*trLo*/ 300L);
85+
assertEquals(200L, ctx.getSpanId());
86+
assertEquals(100L, ctx.getRootSpanId());
87+
88+
stack.enter(ctx);
89+
ctx.put(500L, 600L, 0L, 700L);
90+
assertEquals(600L, ctx.getSpanId());
91+
assertEquals(500L, ctx.getRootSpanId());
92+
93+
stack.exit(ctx);
94+
assertEquals(200L, ctx.getSpanId(), "span must be restored");
95+
assertEquals(100L, ctx.getRootSpanId(), "root span must be restored");
96+
}
97+
98+
@Test
99+
public void chunkedPathRoundTrip() {
100+
// Push past FAST_DEPTH (6) to exercise the lazy-chunk path and Arrays.copyOf growth.
101+
assumeLittleEndian();
102+
ThreadContext ctx = newContext();
103+
ScopeStack stack = new ScopeStack();
104+
105+
final int depth = 20; // FAST_DEPTH + one full 12-slot chunk + 2 into the next
106+
for (int i = 0; i < depth; i++) {
107+
ctx.put(1000L + i, 2000L + i, 0L, 3000L + i);
108+
stack.enter(ctx);
109+
}
110+
assertEquals(depth, stack.depth());
111+
112+
// Scramble state so restore has something to correct.
113+
ctx.put(99L, 99L, 0L, 99L);
114+
115+
for (int i = depth - 1; i >= 0; i--) {
116+
stack.exit(ctx);
117+
assertEquals(2000L + i, ctx.getSpanId(), "span mismatch at depth " + i);
118+
assertEquals(1000L + i, ctx.getRootSpanId(), "root mismatch at depth " + i);
119+
}
120+
assertEquals(0, stack.depth());
121+
}
122+
123+
@Test
124+
public void reusesStackAfterFullUnwind() {
125+
// After the stack returns to depth 0, re-entering must not leak state from the prior run.
126+
assumeLittleEndian();
127+
ThreadContext ctx = newContext();
128+
ScopeStack stack = new ScopeStack();
129+
130+
ctx.put(1L, 2L, 0L, 3L);
131+
stack.enter(ctx);
132+
ctx.put(10L, 20L, 0L, 30L);
133+
stack.exit(ctx);
134+
assertEquals(2L, ctx.getSpanId());
135+
136+
ctx.put(4L, 5L, 0L, 6L);
137+
stack.enter(ctx);
138+
ctx.put(40L, 50L, 0L, 60L);
139+
stack.exit(ctx);
140+
assertEquals(5L, ctx.getSpanId());
141+
}
142+
}

0 commit comments

Comments
 (0)