Skip to content

Commit 5e9d85c

Browse files
fix: prevent level regression when compacting mixed-level TSM files (#27233)
compact() derives the output filename from maxGeneration and maxSequence+1, but maxSequence was scoped to the max generation only. When cold or forced compaction groups L1 files (high gen, seq=1) with L4 files (low gen, seq>=4), the output gets seq=2 — regressing L4 data to L2. Track the minimum sequence per generation to determine each generation's true level, then bump maxSequence so the output level never regresses below the highest input level. Per-generation max sequence is preserved for collision avoidance, and min-sequence level detection avoids inflating the output when a generation has split files from the 2GB size limit. (cherry picked from commit 142fcaa) Fixes #27230 --------- Co-authored-by: Geoffrey Wossum <gwossum@influxdata.com> (cherry picked from commit 381fd2d) Fixes #27232
1 parent ba705ec commit 5e9d85c

File tree

2 files changed

+108
-1
lines changed

2 files changed

+108
-1
lines changed

tsdb/engine/tsm1/compact.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@ func (c *Compactor) compact(fast bool, tsmFiles []string, logger *zap.Logger, po
973973
// set. We need to find that max generation as well as the max sequence
974974
// number to ensure we write to the next unique location.
975975
var maxGeneration, maxSequence int
976+
minSeqByGen := make(map[int]int)
976977

977978
if c.FileStore == nil {
978979
return nil, fmt.Errorf("compactor for %s has no file store: %w", c.Dir, errCompactionsDisabled)
@@ -987,10 +988,26 @@ func (c *Compactor) compact(fast bool, tsmFiles []string, logger *zap.Logger, po
987988
maxGeneration = gen
988989
maxSequence = seq
989990
}
990-
991991
if gen == maxGeneration && seq > maxSequence {
992992
maxSequence = seq
993993
}
994+
if s, ok := minSeqByGen[gen]; !ok || seq < s {
995+
minSeqByGen[gen] = seq
996+
}
997+
}
998+
999+
// Compute the highest compaction level among all input generations.
1000+
// A generation's level = min(its minimum sequence number, 4).
1001+
var maxInputLevel int
1002+
for _, minSeq := range minSeqByGen {
1003+
maxInputLevel = max(maxInputLevel, min(minSeq, 4))
1004+
}
1005+
1006+
// Ensure the output level (min(maxSequence+1, 4)) does not regress
1007+
// below any input level. This matters when cold/forced compaction
1008+
// groups L1 files (high gen, seq=1) with L4 files (low gen, seq>=4).
1009+
if maxSequence+1 < maxInputLevel {
1010+
maxSequence = maxInputLevel - 1
9941011
}
9951012

9961013
// For each TSM file, create a TSM reader

tsdb/engine/tsm1/compact_test.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,96 @@ func TestCompactor_CompactFull(t *testing.T) {
259259
}
260260
}
261261

262+
// mustWriteTSMWithSeq creates a TSM file with the given generation and sequence number.
263+
func mustWriteTSMWithSeq(t *testing.T, dir string, gen, seq int, values map[string][]tsm1.Value) string {
264+
t.Helper()
265+
tempGen := gen*1000 + seq
266+
f := MustWriteTSM(dir, tempGen, values)
267+
newName := filepath.Join(dir, tsm1.DefaultFormatFileName(gen, seq)+".tsm")
268+
require.NoError(t, os.Rename(f, newName), "rename TSM file to gen=%d seq=%d", gen, seq)
269+
return newName
270+
}
271+
272+
// TestCompactor_CompactFull_MixedLevelNoRegression verifies that compacting
273+
// L1 files (seq=1) with L4 files (seq>=4) produces output at L4, not L2.
274+
// This is the cold/forced compaction scenario where Plan() collects all files.
275+
func TestCompactor_CompactFull_MixedLevelNoRegression(t *testing.T) {
276+
dir := MustTempDir()
277+
defer os.RemoveAll(dir)
278+
279+
// L4 files: low generation numbers, high sequence (level 4)
280+
f1 := mustWriteTSMWithSeq(t, dir, 1, 4, map[string][]tsm1.Value{
281+
"cpu,host=A#!~#value": {tsm1.NewValue(1, 1.1)},
282+
})
283+
f2 := mustWriteTSMWithSeq(t, dir, 2, 5, map[string][]tsm1.Value{
284+
"cpu,host=B#!~#value": {tsm1.NewValue(2, 2.1)},
285+
})
286+
// L1 files: high generation numbers, sequence 1 (level 1)
287+
f3 := mustWriteTSMWithSeq(t, dir, 3, 1, map[string][]tsm1.Value{
288+
"cpu,host=C#!~#value": {tsm1.NewValue(3, 3.1)},
289+
})
290+
f4 := mustWriteTSMWithSeq(t, dir, 4, 1, map[string][]tsm1.Value{
291+
"cpu,host=D#!~#value": {tsm1.NewValue(4, 4.1)},
292+
})
293+
294+
ffs := &fakeFileStore{}
295+
defer ffs.Close()
296+
compactor := tsm1.NewCompactor()
297+
compactor.Dir = dir
298+
compactor.FileStore = ffs
299+
compactor.Open()
300+
301+
files, err := compactor.CompactFull([]string{f1, f2, f3, f4}, zap.NewNop(), tsdb.DefaultMaxPointsPerBlock)
302+
require.NoError(t, err)
303+
require.Equal(t, 1, len(files))
304+
305+
gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0])
306+
require.NoError(t, err)
307+
require.Equal(t, 4, gotGen, "output generation should be max input generation")
308+
// Output must be at level 4 (seq >= 4). Without the fix, maxSequence
309+
// was scoped to the max generation (gen=4, seq=1), producing seq=2 (L2).
310+
require.GreaterOrEqual(t, gotSeq, 4, "output level must not regress below highest input level (L4)")
311+
}
312+
313+
// TestCompactor_CompactFull_SplitFilesNoLevelInflation verifies that when a
314+
// generation has multiple files from a 2GB size split (e.g. seq=2,3), the
315+
// output level is based on the generation's true level, not inflated by the
316+
// split artifact sequences.
317+
func TestCompactor_CompactFull_SplitFilesNoLevelInflation(t *testing.T) {
318+
dir := MustTempDir()
319+
defer os.RemoveAll(dir)
320+
321+
// L2 generation split across two files (seq=2 is the level, seq=3 is a split artifact)
322+
f1 := mustWriteTSMWithSeq(t, dir, 1, 2, map[string][]tsm1.Value{
323+
"cpu,host=A#!~#value": {tsm1.NewValue(1, 1.1)},
324+
})
325+
f2 := mustWriteTSMWithSeq(t, dir, 1, 3, map[string][]tsm1.Value{
326+
"cpu,host=B#!~#value": {tsm1.NewValue(2, 2.1)},
327+
})
328+
// Normal L2 generation (single file)
329+
f3 := mustWriteTSMWithSeq(t, dir, 8, 2, map[string][]tsm1.Value{
330+
"cpu,host=C#!~#value": {tsm1.NewValue(3, 3.1)},
331+
})
332+
333+
ffs := &fakeFileStore{}
334+
defer ffs.Close()
335+
compactor := tsm1.NewCompactor()
336+
compactor.Dir = dir
337+
compactor.FileStore = ffs
338+
compactor.Open()
339+
340+
files, err := compactor.CompactFull([]string{f1, f2, f3}, zap.NewNop(), tsdb.DefaultMaxPointsPerBlock)
341+
require.NoError(t, err)
342+
require.Equal(t, 1, len(files))
343+
344+
gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0])
345+
require.NoError(t, err)
346+
require.Equal(t, 8, gotGen, "output generation should be max input generation")
347+
// Output should be seq=3 (L3): one level above the L2 inputs.
348+
// The split file at gen=1,seq=3 must not inflate the output to L4.
349+
require.Equal(t, 3, gotSeq, "output level should be L3, not inflated by split artifact")
350+
}
351+
262352
// Ensures that a compaction will properly merge multiple TSM files
263353
func TestCompactor_DecodeError(t *testing.T) {
264354
dir := MustTempDir()

0 commit comments

Comments
 (0)