Skip to content

Commit f0d5c73

Browse files
feat: cache generation and sequence to reduce TSM filename parsing (#26905) (#26913)
Parse TSM file names only on creation or rename and cache the generation and level, instead of parsing them for those values repeatedly Fixes #26794 Fixes #26907 --------- (cherry picked from commit 8b1696a) Co-authored-by: Geoffrey Wossum <gwossum@influxdata.com>
1 parent 7f42c16 commit f0d5c73

File tree

7 files changed

+180
-82
lines changed

7 files changed

+180
-82
lines changed

cmd/influx_tools/compact/command.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ func (sc *shardCompactor) ParseFileName(path string) (int, int, error) {
150150
return 0, 0, errors.New("not implemented")
151151
}
152152

153+
func (sc *shardCompactor) SupportsCompactionPlanning() bool {
154+
return false
155+
}
156+
153157
func newShardCompactor(path string, logger *zap.Logger) (sc *shardCompactor, err error) {
154158
sc = &shardCompactor{
155159
logger: logger,

tsdb/engine/tsm1/compact.go

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,17 @@ type DefaultPlanner struct {
170170
type fileStore interface {
171171
Stats() []ExtFileStat
172172
LastModified() time.Time
173-
ParseFileName(path string) (int, int, error)
173+
ParseFileName(path string) (generation int, sequence int, err error)
174174
NextGeneration() int
175175
TSMReader(path string) (*TSMReader, error)
176+
SupportsCompactionPlanning() bool
176177
}
177178

178179
func NewDefaultPlanner(fs fileStore, writeColdDuration time.Duration) *DefaultPlanner {
180+
if !fs.SupportsCompactionPlanning() {
181+
// This should only happen due to developer mistakes.
182+
panic("fileStore must support compaction planning")
183+
}
179184
return &DefaultPlanner{
180185
FileStore: fs,
181186
compactFullWriteColdDuration: writeColdDuration,
@@ -188,15 +193,13 @@ func NewDefaultPlanner(fs fileStore, writeColdDuration time.Duration) *DefaultPl
188193
// 000001-01.tsm, 000001-02.tsm would be in the same generation
189194
// 000001 each with different sequence numbers.
190195
type tsmGeneration struct {
191-
id int
192-
files []ExtFileStat
193-
parseFileName ParseFileNameFunc
196+
id int
197+
files []ExtFileStat
194198
}
195199

196-
func newTsmGeneration(id int, parseFileNameFunc ParseFileNameFunc) *tsmGeneration {
200+
func newTsmGeneration(id int) *tsmGeneration {
197201
return &tsmGeneration{
198-
id: id,
199-
parseFileName: parseFileNameFunc,
202+
id: id,
200203
}
201204
}
202205

@@ -213,13 +216,12 @@ func (t *tsmGeneration) size() uint64 {
213216
func (t *tsmGeneration) level() int {
214217
// Level 0 is always created from the result of a cache compaction. It generates
215218
// 1 file with a sequence num of 1. Level 2 is generated by compacting multiple
216-
// level 1 files. Level 3 is generate by compacting multiple level 2 files. Level
219+
// level 1 files. Level 3 is generated by compacting multiple level 2 files. Level
217220
// 4 is for anything else.
218-
_, seq, _ := t.parseFileName(t.files[0].Path)
219-
if seq < 4 {
220-
return seq
221-
}
222221

222+
if t.files[0].Sequence < 4 {
223+
return t.files[0].Sequence
224+
}
223225
return 4
224226
}
225227

@@ -250,10 +252,6 @@ func (c *DefaultPlanner) SetFileStore(fs *FileStore) {
250252
c.FileStore = fs
251253
}
252254

253-
func (c *DefaultPlanner) ParseFileName(path string) (int, int, error) {
254-
return c.FileStore.ParseFileName(path)
255-
}
256-
257255
func (c *DefaultPlanner) generationsFullyCompacted(gens tsmGenerations) (bool, string) {
258256
if len(gens) > 1 {
259257
return false, "not fully compacted and not idle because of more than one generation"
@@ -704,17 +702,15 @@ func (c *DefaultPlanner) findGenerations(skipInUse bool) tsmGenerations {
704702
tsmStats := c.FileStore.Stats()
705703
generations := make(map[int]*tsmGeneration, len(tsmStats))
706704
for _, f := range tsmStats {
707-
gen, _, _ := c.ParseFileName(f.Path)
708-
709705
// Skip any files that are assigned to a current compaction plan
710706
if _, ok := c.filesInUse[f.Path]; skipInUse && ok {
711707
continue
712708
}
713709

714-
group := generations[gen]
710+
group := generations[f.Generation]
715711
if group == nil {
716-
group = newTsmGeneration(gen, c.ParseFileName)
717-
generations[gen] = group
712+
group = newTsmGeneration(f.Generation)
713+
generations[f.Generation] = group
718714
}
719715
group.files = append(group.files, f)
720716
}
@@ -786,7 +782,6 @@ type Compactor struct {
786782
RateLimit limiter.Rate
787783

788784
formatFileName FormatFileNameFunc
789-
parseFileName ParseFileNameFunc
790785

791786
mu sync.RWMutex
792787
snapshotsEnabled bool
@@ -809,18 +804,13 @@ type Compactor struct {
809804
func NewCompactor() *Compactor {
810805
return &Compactor{
811806
formatFileName: DefaultFormatFileName,
812-
parseFileName: DefaultParseFileName,
813807
}
814808
}
815809

816810
func (c *Compactor) WithFormatFileNameFunc(formatFileNameFunc FormatFileNameFunc) {
817811
c.formatFileName = formatFileNameFunc
818812
}
819813

820-
func (c *Compactor) WithParseFileNameFunc(parseFileNameFunc ParseFileNameFunc) {
821-
c.parseFileName = parseFileNameFunc
822-
}
823-
824814
// Open initializes the Compactor.
825815
func (c *Compactor) Open() {
826816
c.mu.Lock()
@@ -986,8 +976,12 @@ func (c *Compactor) compact(fast bool, tsmFiles []string, logger *zap.Logger, po
986976
// set. We need to find that max generation as well as the max sequence
987977
// number to ensure we write to the next unique location.
988978
var maxGeneration, maxSequence int
979+
980+
if c.FileStore == nil {
981+
return nil, fmt.Errorf("compactor for %s has no file store: %w", c.Dir, errCompactionsDisabled)
982+
}
989983
for _, f := range tsmFiles {
990-
gen, seq, err := c.parseFileName(f)
984+
gen, seq, err := c.FileStore.ParseFileName(f)
991985
if err != nil {
992986
return nil, err
993987
}

0 commit comments

Comments
 (0)