Skip to content

Commit 4df508d

Browse files
[2.10] MOD-7634: Fix NOSTEM (#5392)
MOD-7634: Fix NOSTEM (#4956) * Fix NOSTEM * Fix testNoStem * Check spec is not NULL * Add comments to testNoStem * MOD-7596: Add mirroring support (#4937) * * initial commit * * Use RediSearch branch mirroring fork * * Code Review - Round #1 * Change maxPending requests when changing the number of connections (#4972) change the number of maxPending requests when changing the number of connections * MOD-7695: Decide Ubuntu Runner Using Variable (#4975) * * initial commit * * code review * Enable MT in comunity eddition by default - [MOD-7160] (#4974) * remove compilation flag (wip) * finish removing compilation flag * remove from tests * MOD-7706 fix ramp-packer version (#4980) ramp-packer 2.5.10 * Fix expansion for modifier lists * Delete event-mirror.yml it is not part of this PR * Simplify bit masking * Update src/query.c Co-authored-by: Omer Shadmi <76992134+oshadmi@users.noreply.github.com> * Update src/ext/default.c Co-authored-by: Omer Shadmi <76992134+oshadmi@users.noreply.github.com> --------- Co-authored-by: kei-nan <jonathan.keinan@redis.com> Co-authored-by: GuyAv46 <47632673+GuyAv46@users.noreply.github.com> Co-authored-by: DvirDukhan <dvir@redis.com> Co-authored-by: Omer Shadmi <76992134+oshadmi@users.noreply.github.com> (cherry picked from commit 54af5c8) Co-authored-by: nafraf <nafraf@users.noreply.github.com>
1 parent bc003c1 commit 4df508d

5 files changed

Lines changed: 196 additions & 61 deletions

File tree

src/ext/default.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,10 +479,46 @@ int StemmerExpander(RSQueryExpanderCtx *ctx, RSToken *token) {
479479
char *dup = rm_malloc(sl + 2);
480480
dup[0] = STEM_PREFIX;
481481
memcpy(dup + 1, stemmed, sl + 1);
482+
483+
// Get fieldMask which includes only expandable fields
484+
QueryNode *qn = *ctx->currentNode;
485+
t_fieldMask orig_fm = qn->opts.fieldMask;
486+
t_fieldMask expandable_fm = qn->opts.fieldMask;
487+
if (orig_fm != RS_FIELDMASK_ALL) {
488+
t_fieldMask fm = qn->opts.fieldMask;
489+
t_fieldMask bit_mask = 1;
490+
while (fm) {
491+
if (fm & bit_mask) {
492+
const FieldSpec *fs = IndexSpec_GetFieldByBit(ctx->handle->spec, bit_mask);
493+
if (fs && FieldSpec_IsNoStem(fs)) {
494+
expandable_fm &= ~bit_mask;
495+
}
496+
}
497+
fm &= ~bit_mask;
498+
bit_mask <<= 1;
499+
}
500+
}
501+
502+
/* Replace current node with a new union node if needed */
503+
if (qn->type != QN_UNION) {
504+
QueryNode *un = NewUnionNode();
505+
506+
un->opts.fieldMask = qn->opts.fieldMask;
507+
508+
/* Append current node to the new union node as a child */
509+
QueryNode_AddChild(un, qn);
510+
*ctx->currentNode = un;
511+
}
512+
513+
// Add expanded nodes with corresponding field mask
514+
qn = *ctx->currentNode;
515+
qn->opts.fieldMask = expandable_fm;
482516
ctx->ExpandToken(ctx, dup, sl + 1, 0x0); // TODO: Set proper flags here
483517
if (sl != token->len || strncmp((const char *)stemmed, token->str, token->len)) {
484518
ctx->ExpandToken(ctx, rm_strndup((const char *)stemmed, sl), sl, 0x0);
485519
}
520+
// Restore field mask of UNION node
521+
qn->opts.fieldMask = orig_fm;
486522
}
487523
return REDISMODULE_OK;
488524
}

src/query.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,30 @@ static void QueryNode_Expand(RSQueryTokenExpander expander, RSQueryExpanderCtx *
468468
return;
469469
}
470470

471+
// Check that there is at least one stemmable field in the query
472+
if (expCtx->handle && expCtx->handle->spec) {
473+
const IndexSpec *spec = expCtx->handle->spec;
474+
t_fieldMask fm = qn->opts.fieldMask;
475+
if ( fm != RS_FIELDMASK_ALL) {
476+
int expand = 0;
477+
t_fieldMask bit_mask = 1;
478+
while (fm) {
479+
if (fm & bit_mask) {
480+
const FieldSpec *fs = IndexSpec_GetFieldByBit(spec, bit_mask);
481+
if (fs && !FieldSpec_IsNoStem(fs)) {
482+
expand = 1;
483+
break;
484+
}
485+
}
486+
fm &= ~bit_mask;
487+
bit_mask <<= 1;
488+
}
489+
if (!expand) {
490+
return;
491+
}
492+
}
493+
}
494+
471495
if (qn->type == QN_TOKEN && qn->tn.len > 0) {
472496
expCtx->currentNode = pqn;
473497
expander(expCtx, &qn->tn);

tests/pytests/test.py

Lines changed: 124 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1993,29 +1993,139 @@ def testInfoCommandImplied(env):
19931993
env.assertNotEqual(-1, d['index_options'].index('NOHL'))
19941994

19951995
def testNoStem(env):
1996-
env.cmd('ft.create', 'idx', 'ON', 'HASH',
1997-
'schema', 'body', 'text', 'name', 'text', 'nostem')
1998-
if not env.isCluster():
1999-
# todo: change it to be more generic to pass on isCluster
2000-
res = env.cmd('ft.info', 'idx')
2001-
env.assertEqual(res[7][1][8], 'NOSTEM')
1996+
conn = getConnectionByEnv(env)
1997+
1998+
env.expect('ft.create', 'idx', 'ON', 'HASH',
1999+
'schema', 'body', 'text', 'name', 'text', 'nostem',
2000+
'body2', 'text', 'name2', 'text', 'nostem').ok()
2001+
d = index_info(env, 'idx')
2002+
env.assertEqual(d['attributes'][1][8],'NOSTEM')
2003+
env.assertEqual(d['attributes'][3][8],'NOSTEM')
20022004
for _ in env.reloadingIterator():
20032005
waitForIndex(env, 'idx')
20042006
try:
2005-
env.cmd('ft.del', 'idx', 'doc')
2007+
conn.execute_command('ft.del', 'idx', 'doc')
20062008
except redis.ResponseError:
20072009
pass
20082010

2009-
# Insert a document
2010-
env.assertCmdOk('ft.add', 'idx', 'doc', 1.0, 'fields',
2011-
'body', "located",
2012-
'name', "located")
2011+
# Insert documents
2012+
conn.execute_command('HSET', 'doc1', 'body', "located", 'name', "located")
2013+
conn.execute_command('HSET', 'doc2', 'body', "smith", 'name', "smith")
2014+
conn.execute_command('HSET', 'doc3', 'body', "smiths", 'name', "smiths")
2015+
conn.execute_command('HSET', 'doc4', 'body', "cherry")
2016+
conn.execute_command('HSET', 'doc5', 'body', "cherries")
2017+
conn.execute_command('HSET', 'doc6', 'name', "candy")
2018+
conn.execute_command('HSET', 'doc7', 'name', "candies")
2019+
conn.execute_command('HSET', 'doc8', 'body2', "cherries")
2020+
conn.execute_command('HSET', 'doc9', 'name2', "candies")
20132021

20142022
# Now search for the fields
2015-
res_body = env.cmd('ft.search', 'idx', '@body:location')
2016-
res_name = env.cmd('ft.search', 'idx', '@name:location')
2017-
env.assertEqual(0, res_name[0])
2023+
res_body = conn.execute_command('ft.search', 'idx', '@body:location')
20182024
env.assertEqual(1, res_body[0])
2025+
res_name = conn.execute_command('ft.search', 'idx', '@name:location')
2026+
env.assertEqual(0, res_name[0])
2027+
2028+
res_body = conn.execute_command('ft.search', 'idx', '@body:smith')
2029+
env.assertEqual(2, res_body[0])
2030+
res_name = conn.execute_command('ft.search', 'idx', '@name:smith')
2031+
env.assertEqual(1, res_name[0])
2032+
2033+
res_body = conn.execute_command('ft.search', 'idx', '@body:smiths')
2034+
env.assertEqual(2, res_body[0])
2035+
res_name = conn.execute_command('ft.search', 'idx', '@name:smiths')
2036+
env.assertEqual(1, res_name[0])
2037+
2038+
# Test modifier list
2039+
# 2 results are returned because 'body' field is stemming 'cherry'
2040+
res = conn.execute_command('ft.search', 'idx', '@body|name:cherry')
2041+
env.assertEqual(2, res[0])
2042+
res = conn.execute_command('ft.search', 'idx', '@body|name:cherries')
2043+
env.assertEqual(2, res[0])
2044+
2045+
# only 1 result is returned because 'name' field is not stemming
2046+
res = conn.execute_command('ft.search', 'idx', '@body|name:candy')
2047+
env.assertEqual(1, res[0])
2048+
res = conn.execute_command('ft.search', 'idx', '@body|name:candies')
2049+
env.assertEqual(1, res[0])
2050+
2051+
# 3 results are returned because 'body' field is stemming 'candy'
2052+
# but 'name' field is not stemming
2053+
res = conn.execute_command(
2054+
'ft.search', 'idx','@body|name:(candy|cherry)', 'dialect', 2)
2055+
env.assertEqual(3, res[0])
2056+
res2 = conn.execute_command(
2057+
'ft.search', 'idx','@body:(candy|cherry) | @name:(candy|cherry)',
2058+
'dialect', 2)
2059+
env.assertEqual(res, res2)
2060+
2061+
res = conn.execute_command(
2062+
'ft.search', 'idx', '@body|name:(candies|cherries)', 'dialect', 2)
2063+
env.assertEqual(3, res[0])
2064+
res2 = conn.execute_command(
2065+
'ft.search', 'idx', '@body:(candies|cherries) | @name:(candies|cherries)',
2066+
'dialect', 2)
2067+
env.assertEqual(res, res2)
2068+
2069+
# Test explaincli single field stemming
2070+
env.expect('ft.explain', 'idx', '@body:candy').equal(r'''
2071+
@body:UNION {
2072+
@body:candy
2073+
@body:+candi(expanded)
2074+
@body:candi(expanded)
2075+
}
2076+
'''[1:])
2077+
2078+
# Test explaincli with modifier list fields, all fields expanded
2079+
env.expect('ft.explain', 'idx', '@body|body2:candy').equal(r'''
2080+
@body|body2:UNION {
2081+
@body|body2:candy
2082+
@body|body2:+candi(expanded)
2083+
@body|body2:candi(expanded)
2084+
}
2085+
'''[1:])
2086+
2087+
# Test explaincli single field with NOSTEM
2088+
env.expect('ft.explain', 'idx', '@name:candy').equal(r'''
2089+
@name:candy
2090+
'''[1:])
2091+
2092+
# Test explaincli with modifier list NOSTEM fields
2093+
env.expect('ft.explain', 'idx', '@name|name2:candy').equal(r'''
2094+
@name|name2:candy
2095+
'''[1:])
2096+
2097+
# Mixing NOSTEM and stemming fields in the same modifier list
2098+
env.expect('ft.explain', 'idx', '@body|name:candy').equal(r'''
2099+
@body|name:UNION {
2100+
@body|name:candy
2101+
@body:+candi(expanded)
2102+
@body:candi(expanded)
2103+
}
2104+
'''[1:])
2105+
2106+
env.expect('ft.explain', 'idx', '@name2|body|name:candy').equal(r'''
2107+
@body|name|name2:UNION {
2108+
@body|name|name2:candy
2109+
@body:+candi(expanded)
2110+
@body:candi(expanded)
2111+
}
2112+
'''[1:])
2113+
2114+
env.expect('ft.explain', 'idx', '@body2|body|name:candy').equal(r'''
2115+
@body|name|body2:UNION {
2116+
@body|name|body2:candy
2117+
@body|body2:+candi(expanded)
2118+
@body|body2:candi(expanded)
2119+
}
2120+
'''[1:])
2121+
2122+
env.expect('ft.explain', 'idx', '@body2|body|name|name2:candy').equal(r'''
2123+
@body|name|body2|name2:UNION {
2124+
@body|name|body2|name2:candy
2125+
@body|body2:+candi(expanded)
2126+
@body|body2:candi(expanded)
2127+
}
2128+
'''[1:])
20192129

20202130
def testSortbyMissingField(env):
20212131
# GH Issue 131

tests/pytests/test_dialect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_v1_vs_v2(env):
5656
env.expect('FT.EXPLAIN', 'idx', '@num:[0 .1]', 'DIALECT', 1).contains('NUMERIC {0.000000 <= @num <= 1.000000}\n')
5757
env.expect('FT.EXPLAIN', 'idx', '@num:[0 .1]', 'DIALECT', 2).contains('NUMERIC {0.000000 <= @num <= 1.000000}\n')
5858

59-
env.expect('FT.EXPLAIN', 'idx', '@t1:@t2:@t3:hello', 'DIALECT', 1).contains('@NULL:UNION {\n @NULL:hello\n @NULL:+hello(expanded)\n}\n')
59+
env.expect('FT.EXPLAIN', 'idx', '@t1:@t2:@t3:hello', 'DIALECT', 1).contains('@NULL:hello\n')
6060
env.expect('FT.EXPLAIN', 'idx', '@t1:@t2:@t3:hello', 'DIALECT', 2).error().contains('Syntax error')
6161

6262
env.expect('FT.EXPLAIN', 'idx', '@title:{foo}}}}}', 'DIALECT', 1).contains('TAG:@title {\n foo\n}\n')

tests/pytests/test_parser.py

Lines changed: 11 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,8 @@ def test_modifier_v1():
268268
env.expect('FT.EXPLAIN', 'idx', '@t1:hello world @t2:howdy').equal(r'''
269269
INTERSECT {
270270
@t1:INTERSECT {
271-
@t1:UNION {
272-
@t1:hello
273-
@t1:+hello(expanded)
274-
}
275-
@t1:UNION {
276-
@t1:world
277-
@t1:+world(expanded)
278-
}
271+
@t1:hello
272+
@t1:world
279273
}
280274
@t2:UNION {
281275
@t2:howdy
@@ -287,19 +281,9 @@ def test_modifier_v1():
287281

288282
env.expect('FT.EXPLAIN', 'idx', '@t1:(hello|world|mars)').equal(r'''
289283
@t1:UNION {
290-
@t1:UNION {
291-
@t1:hello
292-
@t1:+hello(expanded)
293-
}
294-
@t1:UNION {
295-
@t1:world
296-
@t1:+world(expanded)
297-
}
298-
@t1:UNION {
299-
@t1:mars
300-
@t1:+mar(expanded)
301-
@t1:mar(expanded)
302-
}
284+
@t1:hello
285+
@t1:world
286+
@t1:mars
303287
}
304288
'''[1:])
305289

@@ -317,10 +301,7 @@ def test_modifier_v2(env):
317301

318302
env.expect('FT.EXPLAIN', 'idx', '@t1:hello world @t2:howdy').equal(r'''
319303
INTERSECT {
320-
@t1:UNION {
321-
@t1:hello
322-
@t1:+hello(expanded)
323-
}
304+
@t1:hello
324305
UNION {
325306
world
326307
+world(expanded)
@@ -335,19 +316,9 @@ def test_modifier_v2(env):
335316

336317
env.expect('FT.EXPLAIN', 'idx', '@t1:(hello|world|mars)').equal('''
337318
@t1:UNION {
338-
@t1:UNION {
339-
@t1:hello
340-
@t1:+hello(expanded)
341-
}
342-
@t1:UNION {
343-
@t1:world
344-
@t1:+world(expanded)
345-
}
346-
@t1:UNION {
347-
@t1:mars
348-
@t1:+mar(expanded)
349-
@t1:mar(expanded)
350-
}
319+
@t1:hello
320+
@t1:world
321+
@t1:mars
351322
}
352323
'''[1:])
353324

@@ -358,14 +329,8 @@ def test_modifier_v2(env):
358329
env.expect('FT.EXPLAIN', 'idx', '@t1:(hello world)=>[KNN 10 @v $B]', 'PARAMS', 2, 'B', '#blob#').equal(r'''
359330
VECTOR {
360331
@t1:INTERSECT {
361-
@t1:UNION {
362-
@t1:hello
363-
@t1:+hello(expanded)
364-
}
365-
@t1:UNION {
366-
@t1:world
367-
@t1:+world(expanded)
368-
}
332+
@t1:hello
333+
@t1:world
369334
}
370335
} => {K=10 nearest vectors to `$B` in vector index associated with field @v, yields distance as `__v_score`}
371336
'''[1:])

0 commit comments

Comments
 (0)