Skip to content

Commit 640102a

Browse files
authored
Increase filename boost (#785)
When we introduced filename boosting in BM25, we set it to a very conservative weight. This PR increases the weight from 2.0 -> 5.0, which improves results on relevant evals. Relates to SPLF-88
1 parent df7a7e7 commit 640102a

2 files changed

Lines changed: 4 additions & 4 deletions

File tree

build/scoring_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func TestBM25(t *testing.T) {
7878
content: exampleJava,
7979
language: "Java",
8080
// bm25-score:1.69 (sum-tf: 7.00, length-ratio: 2.00)
81-
wantScore: 1.69,
81+
wantScore: 1.82,
8282
}, {
8383
// Matches only on content
8484
fileName: "example.java",
@@ -99,15 +99,15 @@ func TestBM25(t *testing.T) {
9999
content: exampleJava,
100100
language: "Java",
101101
// bm25-score:1.07 (sum-tf: 2.00, length-ratio: 2.00)
102-
wantScore: 1.07,
102+
wantScore: 1.55,
103103
},
104104
{
105105
// Matches only on filename, and content is missing
106106
fileName: "a/b/c/config.go",
107107
query: &query.Substring{Pattern: "config.go"},
108108
language: "Go",
109109
// bm25-score:1.91 (sum-tf: 2.00, length-ratio: 0.00)
110-
wantScore: 1.91,
110+
wantScore: 2.08,
111111
},
112112
}
113113

score.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands [
132132
term := string(cand.substrLowered)
133133

134134
if cand.fileName {
135-
termFreqs[term] += 2
135+
termFreqs[term] += 5
136136
} else {
137137
termFreqs[term]++
138138
}

0 commit comments

Comments
 (0)