Skip to content

Commit 9b0eeb9

Browse files
authored
Merge pull request #67 from Bit-Quill/dev-match_bool_prefix-ppl#187
match_bool_prefix Add PPL Syntax
2 parents 3f6ac10 + b625276 commit 9b0eeb9

5 files changed

Lines changed: 167 additions & 1 deletion

File tree

docs/user/ppl/functions/relevance.rst

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,45 @@ Another example to show how to set custom values for the optional parameters::
9999
Limitations
100100
>>>>>>>>>>>
101101

102-
The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like ``search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city`` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as ``search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city`` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See `Optimization <../../optimization/optimization.rst>`_ to get more details about the query engine optimization.
102+
The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like ``search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city`` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as ``search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city`` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See `Optimization <../../optimization/optimization.rst>`_ to get more details about the query engine optimization.
103+
104+
105+
MATCH_BOOL_PREFIX
106+
-----
107+
108+
Description
109+
>>>>>>>>>>>
110+
111+
``match_bool_prefix(field_expression, query_expression)``
112+
113+
The match_bool_prefix function maps to the match_bool_prefix query in the search engine. match_bool_prefix creates a match query from all but the last term in the query string. The last term is used to create a prefix query.
114+
115+
- analyzer
116+
- fuzziness
117+
- max_expansions
118+
- prefix_length
119+
- fuzzy_transpositions
120+
- fuzzy_rewrite
121+
- minimum_should_match
122+
- boost
123+
124+
Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::
125+
126+
os> source=accounts | where match_bool_prefix(address, 'Bristol Stre') | fields firstname, address
127+
fetched rows / total rows = 2/2
128+
+-------------+--------------------+
129+
| firstname | address |
130+
|-------------+--------------------|
131+
| Hattie | 671 Bristol Street |
132+
| Nanette | 789 Madison Street |
133+
+-------------+--------------------+
134+
135+
Another example to show how to set custom values for the optional parameters::
136+
137+
os> source=accounts | where match_bool_prefix(address, 'Bristol Stre', minimum_should_match = 2) | fields firstname, address
138+
fetched rows / total rows = 1/1
139+
+-------------+--------------------+
140+
| firstname | address |
141+
|-------------+--------------------|
142+
| Hattie | 671 Bristol Street |
143+
+-------------+--------------------+
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ppl;
7+
8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE;
9+
import static org.opensearch.sql.util.MatcherUtils.rows;
10+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
11+
12+
import java.io.IOException;
13+
import org.json.JSONObject;
14+
import org.junit.Test;
15+
16+
public class MatchBoolPrefixIT extends PPLIntegTestCase {
17+
18+
@Override
19+
public void init() throws IOException {
20+
loadIndex(Index.PHRASE);
21+
}
22+
23+
@Test
24+
public void valid_query_match_test() throws IOException {
25+
JSONObject result =
26+
executeQuery(
27+
String.format(
28+
"source=%s | where match_bool_prefix(phrase, 'qui') | fields phrase",
29+
TEST_INDEX_PHRASE));
30+
31+
verifyDataRows(result,
32+
rows("quick fox"),
33+
rows("quick fox here"));
34+
}
35+
36+
@Test
37+
public void optional_parameter_match_test() throws IOException {
38+
JSONObject result =
39+
executeQuery(
40+
String.format(
41+
"source=%s | where match_bool_prefix(phrase, '2 tes', minimum_should_match=1, fuzziness=2) | fields phrase",
42+
TEST_INDEX_PHRASE));
43+
44+
verifyDataRows(result,
45+
rows("my test"),
46+
rows("my test 2"));
47+
}
48+
49+
@Test
50+
public void no_matches_test() throws IOException {
51+
JSONObject result =
52+
executeQuery(
53+
String.format(
54+
"source=%s | where match_bool_prefix(phrase, 'rice') | fields phrase",
55+
TEST_INDEX_PHRASE));
56+
57+
assertEquals(0, result.getInt("total"));
58+
}
59+
}

ppl/src/main/antlr/OpenSearchPPLLexer.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ IF: 'IF';
265265
// RELEVANCE FUNCTIONS AND PARAMETERS
266266
MATCH: 'MATCH';
267267
MATCH_PHRASE: 'MATCH_PHRASE';
268+
MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX';
268269
ANALYZER: 'ANALYZER';
269270
FUZZINESS: 'FUZZINESS';
270271
AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY';

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ binaryOperator
352352
relevanceFunctionName
353353
: MATCH
354354
| MATCH_PHRASE
355+
| MATCH_BOOL_PREFIX
355356
;
356357

357358
/** literals and values*/
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ppl.antlr;
7+
8+
import static org.junit.Assert.assertNotEquals;
9+
10+
import java.util.List;
11+
import org.antlr.v4.runtime.tree.ParseTree;
12+
import org.junit.Test;
13+
import org.junit.runner.RunWith;
14+
import org.junit.runners.Parameterized;
15+
16+
17+
@RunWith(Parameterized.class)
18+
public class PPLSyntaxParserMatchBoolPrefixSamplesTests {
19+
20+
21+
/** Returns sample queries that the PPLSyntaxParser is expected to parse successfully.
22+
* @return an Iterable of sample queries.
23+
*/
24+
@Parameterized.Parameters(name = "{0}")
25+
public static Iterable<Object> sampleQueries() {
26+
return List.of(
27+
"source=t a= 1 | where match_bool_prefix(a, 'hello world')",
28+
"source=t a = 1 | where match_bool_prefix(a, 'hello world',"
29+
+ " minimum_should_match = 3)",
30+
"source=t a = 1 | where match_bool_prefix(a, 'hello world', fuzziness='AUTO')",
31+
"source=t a = 1 | where match_bool_prefix(a, 'hello world', fuzziness='AUTO:4,6')",
32+
"source=t a= 1 | where match_bool_prefix(a, 'hello world', prefix_length=0)",
33+
"source=t a= 1 | where match_bool_prefix(a, 'hello world', max_expansions=1)",
34+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
35+
+ " fuzzy_transpositions=true)",
36+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
37+
+ " fuzzy_rewrite=constant_score)",
38+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
39+
+ " fuzzy_rewrite=constant_score_boolean)",
40+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
41+
+ " fuzzy_rewrite=scoring_boolean)",
42+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
43+
+ " fuzzy_rewrite=top_terms_blended_freqs_1)",
44+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
45+
+ " fuzzy_rewrite=top_terms_boost_1)",
46+
"source=t a= 1 | where match_bool_prefix(a, 'hello world',"
47+
+ " fuzzy_rewrite=top_terms_1)",
48+
"source=t a= 1 | where match_bool_prefix(a, 'hello world', boost=1)",
49+
"source=t a = 1 | where match_bool_prefix(a, 'hello world', analyzer = 'standard',"
50+
+ "prefix_length = '0', boost = 1)");
51+
}
52+
53+
private final String query;
54+
55+
public PPLSyntaxParserMatchBoolPrefixSamplesTests(String query) {
56+
this.query = query;
57+
}
58+
59+
@Test
60+
public void test() {
61+
ParseTree tree = new PPLSyntaxParser().parse(query);
62+
assertNotEquals(null, tree);
63+
}
64+
}

0 commit comments

Comments
 (0)