Skip to content

Commit efda03d

Browse files
authored
MOD-6786 Fix search on larger then 128 terms (#5524)
* Move length slicing to NOMODIFY if * add py test * fix slicing * fix test * fix text skip cluster * Adding comments * Update test_issues - skip cluster
1 parent fdd48ae commit efda03d

2 files changed

Lines changed: 20 additions & 4 deletions

File tree

src/tokenize.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,14 @@ uint32_t simpleTokenizer_Next(RSTokenizer *base, Token *t) {
8282
char *tok = toksep(&self->pos, &origLen);
8383
// normalize the token
8484
size_t normLen = origLen;
85-
if (normLen > MAX_NORMALIZE_SIZE) {
86-
normLen = MAX_NORMALIZE_SIZE;
87-
}
8885
char normalized_s[MAX_NORMALIZE_SIZE];
8986
char *normBuf;
90-
if (ctx->options & TOKENIZE_NOMODIFY) {
87+
88+
if (ctx->options & TOKENIZE_NOMODIFY) { // This is a dead code
89+
// The stack MAX_NORMALIZE_SIZE buffer is used only if we don't modify the token, for stack allocation safety
90+
if (normLen > MAX_NORMALIZE_SIZE) {
91+
normLen = MAX_NORMALIZE_SIZE;
92+
}
9193
normBuf = normalized_s;
9294
} else {
9395
normBuf = tok;

tests/pytests/test_issues.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,3 +1313,17 @@ def test_mod_8568(env:Env):
13131313
env.expect('FT.SEARCH', 'idx', '*', 'GEOFILTER', 'g', '1.1', '1.1', '1', 'km').equal(expected)
13141314
env.expect('FT.SEARCH', 'idx', '*', 'GEOFILTER', 'g', '1.1', '1.1', '1', 'km',
13151315
'GEOFILTER', 'g', '1.1', '1.1', '1000', 'km').equal(expected)
1316+
1317+
@skip(cluster=True)
1318+
def test_mod_6786(env:Env):
1319+
# Test search of long term (>128) inside text field
1320+
MAX_NORMALIZE_SIZE = 128
1321+
env.expect('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT').ok()
1322+
1323+
long_term = 'A'*(MAX_NORMALIZE_SIZE+1)
1324+
text_with_long_term = ' '.join([long_term, long_term[:MAX_NORMALIZE_SIZE//2]])
1325+
env.cmd('HSET', 'doc1', 't', text_with_long_term)
1326+
1327+
# Searching for the long term should return the document
1328+
# Before fix, the long term was partialy normalized and the document was not found
1329+
env.expect('FT.SEARCH', 'idx', long_term).equal([1, 'doc1', ['t', text_with_long_term]])

0 commit comments

Comments
 (0)