Skip to content

Commit 2289e17

Browse files
authored
ESQL: Prevent circular alias references in DeduplicateAggs (elastic#139175)
## Summary Fixes circular reference detection in alias chains during `DeduplicateAggs` optimizations. ## Problem The `PushDownEnrich` optimization can create circular references when pushing `Enrich` past `Project` operations, causing `ql_illegal_argument_exception: Potential cycle detected` errors when `DeduplicateAggs`. Example query: ```esql FROM languages_lookup | RENAME language_name AS message | ENRICH languages_policy ON language_code | STATS a = count(to_lower(language_name)), b = count(language_name) ``` ## Changes Skip aliases that would create circular references by checking if `alias.toAttribute()` equals the resolved `alias.child()`. Closes elastic#138346 Closes elastic#139541
1 parent b80a5db commit 2289e17

6 files changed

Lines changed: 162 additions & 2 deletions

File tree

docs/changelog/139175.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 139175
2+
summary: "ESQL: Prevent circular alias references in `DeduplicateAggs`"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 138346
7+
- 139541

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
7171
"can't find input for", // https://github.com/elastic/elasticsearch/issues/136596
7272
"out of bounds for length", // https://github.com/elastic/elasticsearch/issues/136851
7373
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/138231
74-
"Potential cycle detected", // https://github.com/elastic/elasticsearch/issues/138346
7574

7675
// Awaiting fixes for correctness
7776
"Expecting at most \\[.*\\] columns, got \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/129561

x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,3 +781,72 @@ ROW a = 1, b = 2
781781
values(language_name_a):keyword | values(language_name_b):keyword
782782
English | French
783783
;
784+
785+
786+
circularRefEnrichStats
787+
required_capability: enrich_load
788+
required_capability: fix_enrich_alias_cycle_in_deduplicate_aggs
789+
FROM languages_lookup
790+
| RENAME language_name AS message
791+
| ENRICH languages_policy ON language_code
792+
| STATS a = count(to_lower(language_name)), b = count(language_name)
793+
;
794+
795+
a:long | b:long
796+
4 | 4
797+
;
798+
799+
800+
circularRefEnrichStats2
801+
required_capability: enrich_load
802+
required_capability: fix_enrich_alias_cycle_in_deduplicate_aggs
803+
FROM languages_lookup
804+
| RENAME language_name AS message
805+
| ENRICH languages_policy ON language_code
806+
| STATS a = count(to_lower(language_name)), b = count(message)
807+
;
808+
809+
a:long | b:long
810+
4 | 4
811+
;
812+
813+
814+
circularRefEnrichStats3
815+
required_capability: enrich_load
816+
required_capability: fix_enrich_alias_cycle_in_deduplicate_aggs
817+
FROM languages_lookup
818+
| ENRICH languages_policy ON language_code
819+
| RENAME language_name AS message
820+
| STATS a = count(to_lower(message)), b = count(message)
821+
;
822+
823+
a:long | b:long
824+
4 | 4
825+
;
826+
827+
828+
circularRefEnrichStats4
829+
required_capability: enrich_load
830+
required_capability: fix_enrich_alias_cycle_in_deduplicate_aggs
831+
FROM languages_lookup
832+
| ENRICH languages_policy ON language_code WITH message=language_name
833+
| RENAME language_name AS message
834+
| STATS a = count(to_lower(message)), b = count(message)
835+
;
836+
837+
a:long | b:long
838+
4 | 4
839+
;
840+
841+
842+
circularRefEnrichStats5
843+
required_capability: enrich_load
844+
required_capability: fix_enrich_alias_cycle_in_deduplicate_aggs
845+
FROM languages_lookup
846+
| ENRICH languages_policy ON language_code WITH message=language_name
847+
| STATS a = count(to_lower(language_name)), b = count(message)
848+
;
849+
850+
a:long | b:long
851+
4 | 4
852+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,13 @@ public enum Cap {
17461746
*/
17471747
METRICS_GROUP_BY_ALL_WITH_TS_DIMENSIONS,
17481748

1749+
/**
1750+
* Fix for circular reference in alias chains during PushDownEnrich and aggregate deduplication.
1751+
* Prevents "Potential cycle detected" errors when aliases reference each other.
1752+
* https://github.com/elastic/elasticsearch/issues/138346
1753+
*/
1754+
FIX_ENRICH_ALIAS_CYCLE_IN_DEDUPLICATE_AGGS,
1755+
17491756
/**
17501757
* Returns the top snippets for given text content and associated query.
17511758
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,9 @@ public Enrich withGeneratedNames(List<String> newNames) {
234234
if (enrichField.name().equals(newName)) {
235235
newEnrichFields.add(enrichField);
236236
} else if (enrichField instanceof ReferenceAttribute ra) {
237-
newEnrichFields.add(new Alias(ra.source(), newName, ra, new NameId(), ra.synthetic()));
237+
// It's safe to generate new name ids when assigning new names - the original attribute representing
238+
// the field in the enrich index cannot be directly referenced in downstream plans, anyway
239+
newEnrichFields.add(new Alias(ra.source(), newName, ra.withId(new NameId()), new NameId(), ra.synthetic()));
238240
} else if (enrichField instanceof Alias a) {
239241
newEnrichFields.add(new Alias(a.source(), newName, a.child(), new NameId(), a.synthetic()));
240242
} else {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
8989
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
9090
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
91+
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToLower;
9192
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.WildcardLike;
9293
import org.elasticsearch.xpack.esql.expression.function.vector.Knn;
9394
import org.elasticsearch.xpack.esql.expression.predicate.logical.And;
@@ -10364,6 +10365,81 @@ public void testPruneColumnsInForkBranchesKeepOnlyNeededAggs() {
1036410365
}
1036510366
}
1036610367

10368+
/**
10369+
* <pre>{@code
10370+
* Limit[1000[INTEGER],false,false]
10371+
* \_Aggregate[[],[COUNT($$TO_LOWER(langua>$COUNT$0{r$}#22,true[BOOLEAN],PT0S[TIME_DURATION]) AS a#12, COUNT($$language_name$t
10372+
* emp_name$23{r}#25,true[BOOLEAN],PT0S[TIME_DURATION]) AS b#15]]
10373+
* \_Eval[[TOLOWER($$language_name$temp_name$23{r}#25) AS $$TO_LOWER(langua>$COUNT$0#22]]
10374+
* \_Enrich[ANY,languages_idx[KEYWORD],language_name{f}#17,{"match":{"indices":[],"match_field":"id","enrich_fields":["langua
10375+
* ge_code","language_name"]}},{=languages_idx},[language_code{r}#20, language_name{r}#24 AS $$language_name$temp_n
10376+
* ame$23#25]]
10377+
* \_Join[LEFT,[language_code{r}#4],[language_code{f}#16],null]
10378+
* |_LocalRelation[[language_code{r}#4],Page{blocks=[IntVectorBlock[vector=ConstantIntVector[positions=1, value=1]]]}]
10379+
* \_EsRelation[languages_lookup][LOOKUP][language_code{f}#16, language_name{f}#17]
10380+
* }</pre>
10381+
*/
10382+
public void testCircularRefEnrichStats() {
10383+
var query = """
10384+
ROW language_code = 1
10385+
| LOOKUP JOIN languages_lookup ON language_code
10386+
| RENAME language_name AS message
10387+
| ENRICH languages_idx ON message
10388+
| STATS a = COUNT(TO_LOWER(language_name)), b = COUNT(language_name)
10389+
""";
10390+
var plan = optimizedPlan(query);
10391+
10392+
// Limit[1000[INTEGER],false,false]
10393+
var limit = as(plan, Limit.class);
10394+
assertThat(limit.limit(), is(new Literal(Source.EMPTY, 1000, DataType.INTEGER)));
10395+
10396+
// Aggregate[[],[COUNT(...) AS a, COUNT(...) AS b]]
10397+
var aggregate = as(limit.child(), Aggregate.class);
10398+
assertThat(aggregate.groupings(), hasSize(0));
10399+
assertThat(aggregate.aggregates(), hasSize(2));
10400+
10401+
var agg1 = as(aggregate.aggregates().get(0), Alias.class);
10402+
assertThat(agg1.name(), is("a"));
10403+
var count1 = as(agg1.child(), Count.class);
10404+
var field1 = as(count1.field(), ReferenceAttribute.class);
10405+
assertThat(field1.name(), is("$$TO_LOWER(langua>$COUNT$0"));
10406+
10407+
var agg2 = as(aggregate.aggregates().get(1), Alias.class);
10408+
assertThat(agg2.name(), is("b"));
10409+
var count2 = as(agg2.child(), Count.class);
10410+
var field2 = as(count2.field(), ReferenceAttribute.class);
10411+
assertThat(field2.name(), startsWith("$$language_name$temp_name$"));
10412+
10413+
// Eval[[TOLOWER($$language_name$temp_name$23{r}#25) AS $$TO_LOWER(langua>$COUNT$0#22]]
10414+
var eval = as(aggregate.child(), Eval.class);
10415+
assertThat(eval.fields(), hasSize(1));
10416+
var evalFieldAlias = as(eval.fields().get(0), Alias.class);
10417+
var evalField = as(evalFieldAlias.child(), ToLower.class);
10418+
10419+
assertThat(evalFieldAlias.name(), is("$$TO_LOWER(langua>$COUNT$0"));
10420+
assertThat(count1.field(), is(evalFieldAlias.toAttribute()));
10421+
assertThat(evalField.source().text(), is("TO_LOWER(language_name)"));
10422+
10423+
// Enrich[ANY,languages_idx[KEYWORD],language_name{f}#17,...]
10424+
var enrich = as(eval.child(), Enrich.class);
10425+
assertThat(enrich.policyName().fold(FoldContext.small()), is(BytesRefs.toBytesRef("languages_idx")));
10426+
assertThat(enrich.enrichFields(), hasSize(2));
10427+
10428+
// Join[LEFT,[language_code{r}#4],[language_code{f}#16],null]
10429+
var join = as(enrich.child(), Join.class);
10430+
assertThat(join.config().type(), is(JoinTypes.LEFT));
10431+
10432+
// LocalRelation[[language_code{r}#4],Page{blocks=[IntVectorBlock[...]]}]
10433+
var localRelation = as(join.left(), LocalRelation.class);
10434+
assertThat(localRelation.output(), hasSize(1));
10435+
assertThat(localRelation.output().get(0).name(), is("language_code"));
10436+
10437+
// EsRelation[languages_lookup][LOOKUP][language_code{f}#16, language_name{f}#17]
10438+
var esRelation = as(join.right(), EsRelation.class);
10439+
assertThat(esRelation.indexPattern(), is("languages_lookup"));
10440+
assertThat(esRelation.indexMode(), is(IndexMode.LOOKUP));
10441+
}
10442+
1036710443
/**
1036810444
* <pre>{@code
1036910445
* Limit[10[INTEGER],false,false]

0 commit comments

Comments
 (0)