Skip to content

Commit 764fe4f

Browse files
authored
index: enable shard merging by default (#798)
This enables shard merging by default for zoekt-sourcegraph-indexserver. Sourcegraph has been using shard merging in production for several years. We have recently confirmed significant performance improvements for queries which are bound by matchTree construction. I also remove -merge_max_priority because we have stopped using it. Use SRC_DISABLE_SHARD_MERGING to disable shard merging. Test plan: mostly CI, I did some manual testing to confirm that shard merging is enabled by default for zoekt-sourcegraph-indexserver.
1 parent bbd1fed commit 764fe4f

5 files changed

Lines changed: 37 additions & 25 deletions

File tree

build/builder.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ type Options struct {
116116
changedOrRemovedFiles []string
117117

118118
LanguageMap ctags.LanguageMap
119+
120+
// ShardMerging is true if builder should respect compound shards. This is a
121+
// Sourcegraph specific option.
122+
ShardMerging bool
119123
}
120124

121125
// HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building.
@@ -194,6 +198,7 @@ func (o *Options) Flags(fs *flag.FlagSet) {
194198

195199
// Sourcegraph specific
196200
fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.")
201+
fs.BoolVar(&o.ShardMerging, "shard_merging", x.ShardMerging, "If set, builder will respect compound shards.")
197202
}
198203

199204
// Args generates command line arguments for o. It is the "inverse" of Flags.
@@ -233,6 +238,10 @@ func (o *Options) Args() []string {
233238
args = append(args, "-disable_ctags")
234239
}
235240

241+
if o.ShardMerging {
242+
args = append(args, "-shard_merging")
243+
}
244+
236245
return args
237246
}
238247

@@ -774,7 +783,7 @@ func (b *Builder) Finish() error {
774783

775784
for p := range toDelete {
776785
// Don't delete compound shards, set tombstones instead.
777-
if zoekt.ShardMergingEnabled() && strings.HasPrefix(filepath.Base(p), "compound-") {
786+
if b.opts.ShardMerging && strings.HasPrefix(filepath.Base(p), "compound-") {
778787
if !strings.HasSuffix(p, ".zoekt") {
779788
continue
780789
}

cmd/zoekt-sourcegraph-indexserver/index.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ type indexArgs struct {
9898
// DeltaShardNumberFallbackThreshold is an upper limit on the number of preexisting shards that can exist
9999
// before attempting a delta build.
100100
DeltaShardNumberFallbackThreshold uint64
101+
102+
// ShardMerging is true if we want zoekt-git-index to respect compound shards.
103+
ShardMerging bool
101104
}
102105

103106
// BuildOptions returns a build.Options represented by indexArgs. Note: it
@@ -131,6 +134,8 @@ func (o *indexArgs) BuildOptions() *build.Options {
131134
DocumentRanksVersion: o.DocumentRanksVersion,
132135

133136
LanguageMap: o.LanguageMap,
137+
138+
ShardMerging: o.ShardMerging,
134139
}
135140
}
136141

cmd/zoekt-sourcegraph-indexserver/main.go

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,8 @@ func (s *Server) indexArgs(opts IndexOptions) *indexArgs {
645645

646646
// 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22
647647
FileLimit: 1 << 20,
648+
649+
ShardMerging: s.shardMerging,
648650
}
649651
}
650652

@@ -1065,6 +1067,18 @@ func srcLogLevelIsDebug() bool {
10651067
return strings.EqualFold(lvl, "dbug") || strings.EqualFold(lvl, "debug")
10661068
}
10671069

1070+
func getEnvWithDefaultBool(k string, defaultVal bool) bool {
1071+
v := os.Getenv(k)
1072+
if v == "" {
1073+
return defaultVal
1074+
}
1075+
b, err := strconv.ParseBool(v)
1076+
if err != nil {
1077+
log.Fatalf("error parsing ENV %s to int64: %s", k, err)
1078+
}
1079+
return b
1080+
}
1081+
10681082
func getEnvWithDefaultInt64(k string, defaultVal int64) int64 {
10691083
v := os.Getenv(k)
10701084
if v == "" {
@@ -1196,12 +1210,12 @@ type rootConfig struct {
11961210
blockProfileRate int
11971211

11981212
// config values related to shard merging
1199-
vacuumInterval time.Duration
1200-
mergeInterval time.Duration
1201-
targetSize int64
1202-
minSize int64
1203-
minAgeDays int
1204-
maxPriority float64
1213+
disableShardMerging bool
1214+
vacuumInterval time.Duration
1215+
mergeInterval time.Duration
1216+
targetSize int64
1217+
minSize int64
1218+
minAgeDays int
12051219

12061220
// config values related to backoff indexing repos with one or more consecutive failures
12071221
backoffDuration time.Duration
@@ -1221,12 +1235,12 @@ func (rc *rootConfig) registerRootFlags(fs *flag.FlagSet) {
12211235
fs.DurationVar(&rc.maxBackoffDuration, "max_backoff_duration", getEnvWithDefaultDuration("MAX_BACKOFF_DURATION", 120*time.Minute), "the maximum duration to backoff from enqueueing a repo for indexing. A negative value disables indexing backoff.")
12221236

12231237
// flags related to shard merging
1238+
fs.BoolVar(&rc.disableShardMerging, "shard_merging", getEnvWithDefaultBool("SRC_DISABLE_SHARD_MERGING", false), "disable shard merging")
12241239
fs.DurationVar(&rc.vacuumInterval, "vacuum_interval", getEnvWithDefaultDuration("SRC_VACUUM_INTERVAL", 24*time.Hour), "run vacuum this often")
12251240
fs.DurationVar(&rc.mergeInterval, "merge_interval", getEnvWithDefaultDuration("SRC_MERGE_INTERVAL", 8*time.Hour), "run merge this often")
12261241
fs.Int64Var(&rc.targetSize, "merge_target_size", getEnvWithDefaultInt64("SRC_MERGE_TARGET_SIZE", 2000), "the target size of compound shards in MiB")
12271242
fs.Int64Var(&rc.minSize, "merge_min_size", getEnvWithDefaultInt64("SRC_MERGE_MIN_SIZE", 1800), "the minimum size of a compound shard in MiB")
12281243
fs.IntVar(&rc.minAgeDays, "merge_min_age", getEnvWithDefaultInt("SRC_MERGE_MIN_AGE", 7), "the time since the last commit in days. Shards with newer commits are excluded from merging.")
1229-
fs.Float64Var(&rc.maxPriority, "merge_max_priority", getEnvWithDefaultFloat64("SRC_MERGE_MAX_PRIORITY", 100), "the maximum priority a shard can have to be considered for merging.")
12301244
}
12311245

12321246
func startServer(conf rootConfig) error {
@@ -1428,7 +1442,7 @@ func newServer(conf rootConfig) (*Server, error) {
14281442
Interval: conf.interval,
14291443
CPUCount: cpuCount,
14301444
queue: *q,
1431-
shardMerging: zoekt.ShardMergingEnabled(),
1445+
shardMerging: !conf.disableShardMerging,
14321446
deltaBuildRepositoriesAllowList: deltaBuildRepositoriesAllowList,
14331447
deltaShardNumberFallbackThreshold: deltaShardNumberFallbackThreshold,
14341448
repositoriesSkipSymbolsCalculationAllowList: reposShouldSkipSymbolsCalculation,
@@ -1439,7 +1453,6 @@ func newServer(conf rootConfig) (*Server, error) {
14391453
targetSizeBytes: conf.targetSize * 1024 * 1024,
14401454
minSizeBytes: conf.minSize * 1024 * 1024,
14411455
minAgeDays: conf.minAgeDays,
1442-
maxPriority: conf.maxPriority,
14431456
},
14441457
timeout: indexingTimeout,
14451458
}, err

cmd/zoekt-sourcegraph-indexserver/merge.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,6 @@ type mergeOpts struct {
180180
// merging. For example, a value of 7 means that only repos that have been
181181
// inactive for 7 days will be considered for merging.
182182
minAgeDays int
183-
184-
// the MAX priority a shard can have to be considered for merging.
185-
maxPriority float64
186183
}
187184

188185
// isExcluded returns true if a shard should not be merged, false otherwise.
@@ -213,10 +210,6 @@ func isExcluded(path string, fi os.FileInfo, opts mergeOpts) bool {
213210
return true
214211
}
215212

216-
if priority, err := strconv.ParseFloat(repos[0].RawConfig["priority"], 64); err == nil && priority > opts.maxPriority {
217-
return true
218-
}
219-
220213
return false
221214
}
222215

tombstones.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,8 @@ import (
55
"fmt"
66
"os"
77
"path/filepath"
8-
"strconv"
98
)
109

11-
// ShardMergingEnabled returns true if SRC_ENABLE_SHARD_MERGING is set to true.
12-
func ShardMergingEnabled() bool {
13-
t := os.Getenv("SRC_ENABLE_SHARD_MERGING")
14-
enabled, _ := strconv.ParseBool(t)
15-
return enabled
16-
}
17-
1810
var mockRepos []*Repository
1911

2012
// SetTombstone idempotently sets a tombstone for repoName in .meta.

0 commit comments

Comments
 (0)