Skip to content

Commit 5fb7136

Browse files
authored
ESQL: Fix incorrectly optimized fork with nullify unmapped_fields (elastic#143030)
This PR fixes a bug where `Fork.withSubPlans()` incorrectly reassigned new `NameId`s to its output attributes, breaking references in the upper plan. This issue specifically manifests when using `FORK` alongside the `SET unmapped_fields="nullify"` mode. By design, a `FORK` assigns new `NameId`s to its output attributes via `refreshOutput()` to decouple them from the internal branches. This isolation is necessary to prevent unintended side effects during plan optimizations, such as aggressive constant folding leaking across branches. However, the previous implementation unconditionally re-minted these `NameId`s every time `withSubPlans()` was called. Because of this, any node sitting above the `FORK` (like `EVAL` or `STATS`) that already held a reference to the initial `NameId`s would suddenly point to a nonexistent ID. Downstream analysis rules would then fail to resolve these orphaned references, causing the plan execution to fail with an *"optimized incorrectly due to missing references"* error. Fixes elastic#142762
1 parent 4a1f52c commit 5fb7136

10 files changed

Lines changed: 507 additions & 34 deletions

File tree

docs/changelog/143030.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 143030
2+
summary: "ESQL: Fix incorrectly optimized fork with nullify unmapped_fields"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 142762

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,9 +354,6 @@ tests:
354354
- class: org.elasticsearch.xpack.sql.qa.security.CliApiKeyIT
355355
method: testCliConnectionWithInvalidApiKey
356356
issue: https://github.com/elastic/elasticsearch/issues/143646
357-
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeForkIT
358-
method: test {csv-spec:unmapped-nullify.TsWithAggsByMissing}
359-
issue: https://github.com/elastic/elasticsearch/issues/142762
360357
- class: org.elasticsearch.compute.lucene.query.LuceneTopNSourceOperatorScoringTests
361358
method: testAccumulateSearchLoad
362359
issue: https://github.com/elastic/elasticsearch/issues/143750

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
9797
"found value \\[.*\\] type \\[unsupported\\]", // https://github.com/elastic/elasticsearch/issues/142761
9898
"change point value \\[.*\\] must be numeric", // https://github.com/elastic/elasticsearch/issues/142858
9999
"illegal query_string option \\[boost\\]", // https://github.com/elastic/elasticsearch/issues/142758
100-
// https://github.com/elastic/elasticsearch/issues/142543
101-
"Column \\[.*\\] has conflicting data types in FORK branches: \\[NULL\\] and \\[.*\\]",
102-
"Column \\[.*\\] has conflicting data types in FORK branches: \\[.*\\] and \\[NULL\\]",
103100
"Field \\[.*\\] of type \\[.*\\] does not support match.* queries",
104101
"JOIN left field \\[.*\\] of type \\[NULL\\] is incompatible with right", // https://github.com/elastic/elasticsearch/issues/141827
105102
// https://github.com/elastic/elasticsearch/issues/141827
@@ -312,7 +309,6 @@ private record FailureContext(
312309
},
313310
ctx -> isUnmappedFieldError(ctx.errorMessage, ctx.query),
314311
ctx -> isScalarTypeMismatchError(ctx.errorMessage),
315-
ctx -> isForkOptimizationBugWithUnmappedFields(ctx.errorMessage, ctx.query),
316312
ctx -> isFieldFullTextError(ctx.errorMessage, ctx.query, ctx.previousCommands, ctx.currentSchema),
317313
ctx -> isFullTextAfterSampleBug(ctx.errorMessage, ctx.query),
318314
ctx -> isFullTextAfterWhereBugs(ctx.errorMessage),
@@ -447,20 +443,6 @@ private static boolean isScalarTypeMismatchError(String errorMessage) {
447443
return SCALAR_TYPE_MISMATCH_PATTERN.matcher(errorWithoutLineBreaks).matches();
448444
}
449445

450-
private static final Pattern FORK_OPTIMIZED_INCORRECTLY_PATTERN = Pattern.compile(
451-
".*Plan \\[.*\\] optimized incorrectly due to missing references \\[_fork.*",
452-
Pattern.DOTALL
453-
);
454-
455-
/**
456-
* When {@code SET unmapped_fields="nullify"} is used, the _fork reference can go missing during plan optimization.
457-
* https://github.com/elastic/elasticsearch/issues/142762
458-
*/
459-
static boolean isForkOptimizationBugWithUnmappedFields(String errorMessage, String query) {
460-
String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage);
461-
return query.startsWith(SET_UNMAPPED_FIELDS_PREFIX) && FORK_OPTIMIZED_INCORRECTLY_PATTERN.matcher(errorWithoutLineBreaks).matches();
462-
}
463-
464446
private static final Pattern NOT_A_FIELD_FROM_INDEX_PATTERN = Pattern.compile(
465447
".*cannot operate on \\[([^]]+)\\], which is not a field from an index mapping.*",
466448
Pattern.DOTALL

x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,3 +698,88 @@ FROM languages
698698
host.entity.id:null | entity.id:keyword
699699
null | foo
700700
;
701+
702+
703+
forkWithRowAndUnmappedKeep
704+
required_capability: fork_v9
705+
required_capability: fix_fork_unmapped_nullify
706+
707+
SET unmapped_fields="nullify"\;
708+
ROW a = 1
709+
| FORK (where true)
710+
| WHERE a == 1
711+
| KEEP bar
712+
;
713+
714+
bar:null
715+
null
716+
;
717+
718+
719+
forkWithFromAndUnmappedCoalesce
720+
required_capability: fork_v9
721+
required_capability: fix_fork_unmapped_nullify
722+
723+
SET unmapped_fields="nullify"\;
724+
FROM employees
725+
| FORK (where foo != 84) (where true)
726+
| WHERE _fork == "fork1"
727+
| DROP _fork
728+
| EVAL y = coalesce(bar, baz)
729+
;
730+
731+
avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword | salary_change.long:long | still_hired:boolean | foo:null | bar:null | baz:null | y:null
732+
;
733+
734+
735+
forkWithUnmappedStatsEvalKeep
736+
required_capability: fork_v9
737+
required_capability: fix_fork_unmapped_nullify
738+
739+
SET unmapped_fields="nullify"\;
740+
FROM alerts
741+
| KEEP kibana.alert.risk_score
742+
| FORK (WHERE true | MV_EXPAND kibana.alert.risk_score)
743+
| STATS kibana.alert.risk_score = COUNT(*)
744+
| EVAL x = LEAST(kibana.alert.risk_score, 52, 60)
745+
| KEEP kibana.alert.risk_score
746+
;
747+
748+
kibana.alert.risk_score:long
749+
10
750+
;
751+
752+
753+
forkWithUnmappedStatsEvalKeepTwoBranches
754+
required_capability: fork_v9
755+
required_capability: fix_fork_unmapped_nullify
756+
required_capability: sample_v3
757+
758+
SET unmapped_fields="nullify"\;
759+
FROM alerts
760+
| KEEP kibana.alert.risk_score
761+
| FORK (WHERE true | MV_EXPAND kibana.alert.risk_score) (WHERE true | SAMPLE 0.5)
762+
| STATS kibana.alert.risk_score = COUNT(*)
763+
| EVAL x = LEAST(kibana.alert.risk_score, 52, 60)
764+
| KEEP kibana.alert.risk_score
765+
;
766+
767+
kibana.alert.risk_score:long
768+
10..20
769+
;
770+
771+
772+
forkWithRowCoalesceAndDrop
773+
required_capability: fork_v9
774+
required_capability: fix_fork_unmapped_nullify
775+
776+
SET unmapped_fields="nullify"\;
777+
ROW a = 12::long
778+
| FORK (WHERE true)
779+
| EVAL x = COALESCE(a, 5)
780+
| DROP a
781+
;
782+
783+
_fork:keyword | x:long
784+
fork1 | 12
785+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,6 +2273,15 @@ public enum Cap {
22732273
*/
22742274
TOP_SNIPPETS_FOLDABLE_QUERY_CHECK,
22752275

2276+
/**
2277+
* Fixes an analysis bug in {@code FORK} with {@code unmapped_fields="nullify"}.
2278+
* Preserve existing attribute {@code NameId}s so that references from upper plan nodes remain valid after
2279+
* sub-plans are updated. Only genuinely new attributes get fresh NameIds.
2280+
* Keeping the same attributes can have unintended side effects when applying optimizations like constant folding.
2281+
* https://github.com/elastic/elasticsearch/issues/142762
2282+
*/
2283+
FIX_FORK_UNMAPPED_NULLIFY,
2284+
22762285
// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
22772286
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
22782287
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@
188188
import static java.util.Collections.singletonList;
189189
import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.GEO_MATCH_TYPE;
190190
import static org.elasticsearch.xpack.esql.capabilities.TranslationAware.translatable;
191-
import static org.elasticsearch.xpack.esql.core.expression.Expressions.toReferenceAttributes;
191+
import static org.elasticsearch.xpack.esql.core.expression.Expressions.toReferenceAttributesPreservingIds;
192192
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
193193
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
194194
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
@@ -1096,7 +1096,7 @@ && subqueryReferencingIndexWithEmptyMapping(fork, logicalPlan, forkColumns) == f
10961096
return fork;
10971097
}
10981098

1099-
return fork.replaceSubPlansAndOutput(newSubPlans, toReferenceAttributes(outputUnion));
1099+
return fork.replaceSubPlansAndOutput(newSubPlans, toReferenceAttributesPreservingIds(outputUnion, fork.output()));
11001100
}
11011101

11021102
/*

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ private static Alias nullAlias(NamedExpression attribute) {
335335
* excluding the {@link UnresolvedPattern} and {@link UnresolvedTimestamp} subtypes.
336336
*/
337337
private static LinkedHashMap<String, List<UnresolvedAttribute>> collectUnresolved(LogicalPlan plan) {
338-
Set<String> childOutputNames = new java.util.HashSet<>();
338+
Set<String> childOutputNames = new HashSet<>();
339339
for (LogicalPlan child : plan.children()) {
340340
for (Attribute attr : child.output()) {
341341
childOutputNames.add(attr.name());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expressions.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
import java.util.ArrayList;
1313
import java.util.Collection;
14+
import java.util.HashMap;
1415
import java.util.List;
16+
import java.util.Map;
1517
import java.util.function.Predicate;
1618

1719
import static java.util.Collections.emptyList;
@@ -32,19 +34,27 @@ public static List<Attribute> asAttributes(List<? extends NamedExpression> named
3234
}
3335

3436
/**
35-
* @return a list of {@link ReferenceAttribute}s corresponding to the given named expressions.
36-
* <p>
37-
* The returned ReferenceAttributes will have new {@link NameId}s, also in the case the input contains ReferenceAttributes.
37+
* Converts named expressions to {@link ReferenceAttribute}s, preserving {@link NameId}s for attributes whose name
38+
* matches one in {@code existingOutput}. Genuinely new attributes get fresh NameIds.
3839
*/
39-
public static List<Attribute> toReferenceAttributes(List<? extends NamedExpression> named) {
40+
public static List<Attribute> toReferenceAttributesPreservingIds(
41+
List<? extends NamedExpression> named,
42+
List<Attribute> existingOutput
43+
) {
4044
if (named.isEmpty()) {
4145
return emptyList();
4246
}
47+
Map<String, Attribute> existingByName = HashMap.newHashMap(existingOutput.size());
48+
for (Attribute attr : existingOutput) {
49+
existingByName.put(attr.name(), attr);
50+
}
4351
List<Attribute> list = new ArrayList<>(named.size());
4452
for (NamedExpression exp : named) {
53+
Attribute existing = existingByName.get(exp.name());
54+
NameId id = existing != null ? existing.id() : new NameId();
4555
ReferenceAttribute refAttr = exp instanceof ReferenceAttribute ra
46-
? (ReferenceAttribute) ra.withId(new NameId())
47-
: new ReferenceAttribute(exp.source(), null, exp.name(), exp.dataType(), exp.nullable(), null, exp.synthetic());
56+
? (ReferenceAttribute) ra.withId(id)
57+
: new ReferenceAttribute(exp.source(), null, exp.name(), exp.dataType(), exp.nullable(), id, exp.synthetic());
4858
list.add(refAttr);
4959
}
5060
return list;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Fork.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import java.util.stream.Collectors;
3131

3232
import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS;
33-
import static org.elasticsearch.xpack.esql.core.expression.Expressions.toReferenceAttributes;
33+
import static org.elasticsearch.xpack.esql.core.expression.Expressions.toReferenceAttributesPreservingIds;
3434

3535
/**
3636
* A Fork is a n-ary {@code Plan} where each child is a sub plan, e.g.
@@ -105,13 +105,11 @@ public Fork replaceSubPlansAndOutput(List<LogicalPlan> subPlans, List<Attribute>
105105
}
106106

107107
public Fork refreshOutput() {
108-
// We don't want to keep the same attributes that are outputted by the FORK branches.
109-
// Keeping the same attributes can have unintended side effects when applying optimizations like constant folding.
110108
return new Fork(source(), children(), refreshedOutput());
111109
}
112110

113111
protected List<Attribute> refreshedOutput() {
114-
return toReferenceAttributes(outputUnion(children()));
112+
return toReferenceAttributesPreservingIds(outputUnion(children()), this.output());
115113
}
116114

117115
@Override

0 commit comments

Comments
 (0)