Skip to content

Commit 9c60367

Browse files
committed
SQL: Enhance checks for inexact fields (#39427)
For functions: move checks for `text` fields without underlying `keyword` fields or with many of them (ambiguity) to the type resolution stage. For Order By/Group By: move checks to the `Verifier` to catch early before `QueryTranslator` or execution. Closes: #38501 Fixes: #35203
1 parent 2fae792 commit 9c60367

44 files changed

Lines changed: 495 additions & 248 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,25 +2147,25 @@ SELECT NOW() AS result;
21472147
////////////
21482148
limitationSubSelect
21492149
// tag::limitationSubSelect
2150-
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%';
2150+
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%' ORDER BY 1;
21512151

21522152
first_name | last_name
21532153
---------------+---------------
2154-
Anneke |Preusig
2155-
Alejandro |McAlpine
2156-
Anoosh |Peyn
2157-
Arumugam |Ossenbruggen
2154+
Alejandro |McAlpine
2155+
Anneke |Preusig
2156+
Anoosh |Peyn
2157+
Arumugam |Ossenbruggen
21582158
// end::limitationSubSelect
21592159
;
21602160

2161-
limitationSubSelect
2161+
limitationSubSelectRewritten
21622162
// tag::limitationSubSelectRewritten
2163-
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%';
2163+
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%' ORDER BY 1;
21642164
// end::limitationSubSelectRewritten
21652165
first_name | last_name
21662166
---------------+---------------
2167-
Anneke |Preusig
2168-
Alejandro |McAlpine
2169-
Anoosh |Peyn
2170-
Arumugam |Ossenbruggen
2167+
Alejandro |McAlpine
2168+
Anneke |Preusig
2169+
Anoosh |Peyn
2170+
Arumugam |Ossenbruggen
21712171
;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.xpack.sql.stats.Metrics;
3838
import org.elasticsearch.xpack.sql.tree.Node;
3939
import org.elasticsearch.xpack.sql.type.DataType;
40+
import org.elasticsearch.xpack.sql.type.EsField;
4041
import org.elasticsearch.xpack.sql.util.StringUtils;
4142

4243
import java.util.ArrayList;
@@ -290,7 +291,8 @@ Collection<Failure> verify(LogicalPlan plan) {
290291
*/
291292
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
292293
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
293-
return checkGroupByAgg(p, localFailures, resolvedFunctions)
294+
return checkGroupByInexactField(p, localFailures)
295+
&& checkGroupByAgg(p, localFailures, resolvedFunctions)
294296
&& checkGroupByOrder(p, localFailures, groupingFailures)
295297
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
296298
}
@@ -437,6 +439,21 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> sourc
437439
return false;
438440
}
439441

442+
private static boolean checkGroupByInexactField(LogicalPlan p, Set<Failure> localFailures) {
443+
if (p instanceof Aggregate) {
444+
Aggregate a = (Aggregate) p;
445+
446+
// The grouping can not be an aggregate function or an inexact field (e.g. text without a keyword)
447+
a.groupings().forEach(e -> e.forEachUp(c -> {
448+
EsField.Exact exact = c.getExactInfo();
449+
if (exact.hasExact() == false) {
450+
localFailures.add(fail(c, "Field of data type [" + c.dataType().esType+ "] cannot be used for grouping; " +
451+
exact.errorMsg()));
452+
}
453+
}, FieldAttribute.class));
454+
}
455+
return true;
456+
}
440457

441458
// check whether plain columns specified in an agg are mentioned in the group-by
442459
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures, Map<String, Function> functions) {
@@ -696,4 +713,4 @@ private static boolean areTypesCompatible(DataType left, DataType right) {
696713
(left.isNumeric() && right.isNumeric());
697714
}
698715
}
699-
}
716+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
public abstract class SourceGenerator {
3535

36+
private SourceGenerator() {}
37+
3638
private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
3739

3840
public static SearchSourceBuilder sourceBuilder(QueryContainer container, QueryBuilder filter, Integer size) {
@@ -107,8 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
107109

108110
// sorting only works on not-analyzed fields - look for a multi-field replacement
109111
if (attr instanceof FieldAttribute) {
110-
FieldAttribute fa = (FieldAttribute) attr;
111-
fa = fa.isInexact() ? fa.exactAttribute() : fa;
112+
FieldAttribute fa = ((FieldAttribute) attr).exactAttribute();
112113

113114
sortBuilder = fieldSort(fa.name())
114115
.missing(as.missing().position())
@@ -125,7 +126,8 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
125126
if (nestedSort == null) {
126127
fieldSort.setNestedSort(newSort);
127128
} else {
128-
for (; nestedSort.getNestedSort() != null; nestedSort = nestedSort.getNestedSort()) {
129+
while (nestedSort.getNestedSort() != null) {
130+
nestedSort = nestedSort.getNestedSort();
129131
}
130132
nestedSort.setNestedSort(newSort);
131133
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Expressions.java

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,15 @@
55
*/
66
package org.elasticsearch.xpack.sql.expression;
77

8-
import org.elasticsearch.common.Strings;
98
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
10-
import org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
119
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
1210
import org.elasticsearch.xpack.sql.type.DataType;
13-
import org.elasticsearch.xpack.sql.type.DataTypes;
1411

1512
import java.util.ArrayList;
1613
import java.util.Collection;
1714
import java.util.List;
18-
import java.util.Locale;
1915
import java.util.function.Predicate;
2016

21-
import static java.lang.String.format;
2217
import static java.util.Collections.emptyList;
2318
import static java.util.Collections.emptyMap;
2419

@@ -153,43 +148,4 @@ public static List<Pipe> pipe(List<Expression> expressions) {
153148
}
154149
return pipes;
155150
}
156-
157-
public static TypeResolution typeMustBeBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
158-
return typeMustBe(e, dt -> dt == DataType.BOOLEAN, operationName, paramOrd, "boolean");
159-
}
160-
161-
public static TypeResolution typeMustBeInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
162-
return typeMustBe(e, DataType::isInteger, operationName, paramOrd, "integer");
163-
}
164-
165-
public static TypeResolution typeMustBeNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
166-
return typeMustBe(e, DataType::isNumeric, operationName, paramOrd, "numeric");
167-
}
168-
169-
public static TypeResolution typeMustBeString(Expression e, String operationName, ParamOrdinal paramOrd) {
170-
return typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
171-
}
172-
173-
public static TypeResolution typeMustBeDate(Expression e, String operationName, ParamOrdinal paramOrd) {
174-
return typeMustBe(e, dt -> dt == DataType.DATE, operationName, paramOrd, "date");
175-
}
176-
177-
public static TypeResolution typeMustBeNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
178-
return typeMustBe(e, dt -> dt.isNumeric() || dt == DataType.DATE, operationName, paramOrd, "numeric", "date");
179-
}
180-
181-
public static TypeResolution typeMustBe(Expression e,
182-
Predicate<DataType> predicate,
183-
String operationName,
184-
ParamOrdinal paramOrd,
185-
String... acceptedTypes) {
186-
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
187-
TypeResolution.TYPE_RESOLVED :
188-
new TypeResolution(format(Locale.ROOT, "[%s]%s argument must be [%s], found value [%s] type [%s]",
189-
operationName,
190-
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : " " + paramOrd.name().toLowerCase(Locale.ROOT),
191-
Strings.arrayToDelimitedString(acceptedTypes, " or "),
192-
Expressions.name(e),
193-
e.dataType().esType));
194-
}
195151
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/FieldAttribute.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,13 @@ public FieldAttribute nestedParent() {
8181
return nestedParent;
8282
}
8383

84-
public boolean isInexact() {
85-
return field.isExact() == false;
84+
public EsField.Exact getExactInfo() {
85+
return field.getExactInfo();
8686
}
8787

8888
public FieldAttribute exactAttribute() {
89-
if (field.isExact() == false) {
89+
EsField exactField = field.getExactField();
90+
if (exactField.equals(field) == false) {
9091
return innerField(field.getExactField());
9192
}
9293
return this;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Order.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.Objects;
1414

1515
import static java.util.Collections.singletonList;
16+
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isExact;
1617

1718
public class Order extends Expression {
1819

@@ -45,6 +46,11 @@ public Nullability nullable() {
4546
return Nullability.FALSE;
4647
}
4748

49+
@Override
50+
protected TypeResolution resolveType() {
51+
return isExact(child, "ORDER BY cannot be applied to field of data type [{}]: {}");
52+
}
53+
4854
@Override
4955
public DataType dataType() {
5056
return child.dataType();
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.expression;
7+
8+
import org.elasticsearch.xpack.sql.type.DataType;
9+
import org.elasticsearch.xpack.sql.type.DataTypes;
10+
import org.elasticsearch.xpack.sql.type.EsField;
11+
12+
import java.util.Locale;
13+
import java.util.StringJoiner;
14+
import java.util.function.Predicate;
15+
16+
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
17+
import static org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
18+
import static org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
19+
import static org.elasticsearch.xpack.sql.expression.Expressions.name;
20+
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;
21+
22+
public final class TypeResolutions {
23+
24+
private TypeResolutions() {}
25+
26+
public static TypeResolution isBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
27+
return isType(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
28+
}
29+
30+
public static TypeResolution isInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
31+
return isType(e, DataType::isInteger, operationName, paramOrd, "integer");
32+
}
33+
34+
public static TypeResolution isNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
35+
return isType(e, DataType::isNumeric, operationName, paramOrd, "numeric");
36+
}
37+
38+
public static TypeResolution isString(Expression e, String operationName, ParamOrdinal paramOrd) {
39+
return isType(e, DataType::isString, operationName, paramOrd, "string");
40+
}
41+
42+
public static TypeResolution isDate(Expression e, String operationName, ParamOrdinal paramOrd) {
43+
return isType(e, DataType::isDateBased, operationName, paramOrd, "date");
44+
}
45+
46+
public static TypeResolution isNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
47+
return isType(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "numeric");
48+
}
49+
50+
public static TypeResolution isExact(Expression e, String message) {
51+
if (e instanceof FieldAttribute) {
52+
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
53+
if (exact.hasExact() == false) {
54+
return new TypeResolution(format(null, message, e.dataType().esType, exact.errorMsg()));
55+
}
56+
}
57+
return TypeResolution.TYPE_RESOLVED;
58+
}
59+
60+
public static TypeResolution isExact(Expression e, String operationName, ParamOrdinal paramOrd) {
61+
if (e instanceof FieldAttribute) {
62+
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
63+
if (exact.hasExact() == false) {
64+
return new TypeResolution(format(null, "[{}] cannot operate on {}field of data type [{}]: {}",
65+
operationName,
66+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ?
67+
"" : paramOrd.name().toLowerCase(Locale.ROOT) + " argument ",
68+
e.dataType().esType, exact.errorMsg()));
69+
}
70+
}
71+
return TypeResolution.TYPE_RESOLVED;
72+
}
73+
74+
public static TypeResolution isStringAndExact(Expression e, String operationName, ParamOrdinal paramOrd) {
75+
TypeResolution resolution = isString(e, operationName, paramOrd);
76+
if (resolution.unresolved()) {
77+
return resolution;
78+
}
79+
80+
return isExact(e, operationName, paramOrd);
81+
}
82+
83+
public static TypeResolution isFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
84+
if (!e.foldable()) {
85+
return new TypeResolution(format(null, "{}argument of [{}] must be a constant, received [{}]",
86+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
87+
operationName,
88+
Expressions.name(e)));
89+
}
90+
return TypeResolution.TYPE_RESOLVED;
91+
}
92+
93+
public static TypeResolution isNotFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
94+
if (e.foldable()) {
95+
return new TypeResolution(format(null, "{}argument of [{}] must be a table column, found constant [{}]",
96+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
97+
operationName,
98+
Expressions.name(e)));
99+
}
100+
return TypeResolution.TYPE_RESOLVED;
101+
}
102+
103+
public static TypeResolution isType(Expression e,
104+
Predicate<DataType> predicate,
105+
String operationName,
106+
ParamOrdinal paramOrd,
107+
String... acceptedTypes) {
108+
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
109+
TypeResolution.TYPE_RESOLVED :
110+
new TypeResolution(format(null, "{}argument of [{}] must be [{}], found value [{}] type [{}]",
111+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
112+
operationName,
113+
acceptedTypesForErrorMsg(acceptedTypes),
114+
name(e),
115+
e.dataType().esType));
116+
}
117+
118+
private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
119+
StringJoiner sj = new StringJoiner(", ");
120+
for (int i = 0; i < acceptedTypes.length - 1; i++) {
121+
sj.add(acceptedTypes[i]);
122+
}
123+
if (acceptedTypes.length > 1) {
124+
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
125+
} else {
126+
return acceptedTypes[0];
127+
}
128+
}
129+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/AggregateFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
99
import org.elasticsearch.xpack.sql.expression.Expression;
10+
import org.elasticsearch.xpack.sql.expression.Expressions;
11+
import org.elasticsearch.xpack.sql.expression.TypeResolutions;
1012
import org.elasticsearch.xpack.sql.expression.function.Function;
1113
import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggNameInput;
1214
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
@@ -78,8 +80,13 @@ public boolean equals(Object obj) {
7880
&& Objects.equals(other.parameters(), parameters());
7981
}
8082

83+
@Override
84+
protected TypeResolution resolveType() {
85+
return TypeResolutions.isExact(field, functionName(), Expressions.ParamOrdinal.DEFAULT);
86+
}
87+
8188
@Override
8289
public int hashCode() {
8390
return Objects.hash(field(), parameters());
8491
}
85-
}
92+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
package org.elasticsearch.xpack.sql.expression.function.aggregate;
77

88
import org.elasticsearch.xpack.sql.expression.Expression;
9-
import org.elasticsearch.xpack.sql.expression.Expressions;
109
import org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
1110
import org.elasticsearch.xpack.sql.tree.Location;
1211
import org.elasticsearch.xpack.sql.tree.NodeInfo;
1312
import org.elasticsearch.xpack.sql.type.DataType;
1413

1514
import java.util.List;
1615

16+
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isNumericOrDate;
17+
1718
/**
1819
* Find the maximum value in matching documents.
1920
*/
@@ -45,6 +46,6 @@ public String innerName() {
4546

4647
@Override
4748
protected TypeResolution resolveType() {
48-
return Expressions.typeMustBeNumericOrDate(field(), functionName(), ParamOrdinal.DEFAULT);
49+
return isNumericOrDate(field(), functionName(), ParamOrdinal.DEFAULT);
4950
}
5051
}

0 commit comments

Comments
 (0)