Skip to content

Commit ff0117b

Browse files
author
Alexandros Batsakis
committed
replace regex
1 parent a0aacec commit ff0117b

7 files changed

Lines changed: 87 additions & 32 deletions

File tree

docs/reference/esql/esql-functions.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ these functions:
4646
* <<esql-now>>
4747
* <<esql-pi>>
4848
* <<esql-pow>>
49+
* <<esql-replace>>
4950
* <<esql-round>>
5051
* <<esql-sin>>
5152
* <<esql-sinh>>
@@ -106,6 +107,7 @@ include::functions/mv_sum.asciidoc[]
106107
include::functions/now.asciidoc[]
107108
include::functions/pi.asciidoc[]
108109
include::functions/pow.asciidoc[]
110+
include::functions/replace.asciidoc[]
109111
include::functions/round.asciidoc[]
110112
include::functions/sin.asciidoc[]
111113
include::functions/sinh.asciidoc[]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[[esql-replace]]
2+
=== `REPLACE`
3+
This function substitutes the replacement string (3rd argument) for every occurrence of the regular expression (2nd argument) in the string (1st argument).
4+
5+
If any of the arguments are `NULL`, the result is `NULL`.
6+
7+
. This example replaces an occurrence of the word "World" with the word "Universe":
8+
9+
[source.merge.styled,esql]
10+
----
11+
include::{esql-specs}/docs.csv-spec[tag=replaceString]
12+
----
13+
[%header.monospaced.styled,format=dsv,separator=|]
14+
|===
15+
include::{esql-specs}/docs.csv-spec[tag=replaceString-result]
16+
|===

x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,3 +409,14 @@ ROW date_string = "2022-05-06"
409409
date_string:keyword | date:date
410410
2022-05-06 | 2022-05-06T00:00:00.000Z
411411
;
412+
413+
docsReplace
414+
//tag::replaceString[]
415+
ROW str = "Hello World" | EVAL str = REPLACE(str, "World", "Universe") | KEEP str;
416+
// end::replaceString[]
417+
418+
//tag::replaceString-result[]
419+
str:keyword
420+
Hello Universe
421+
// end::replaceString-result[]
422+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,3 +746,12 @@ emp_no:integer | last_name:keyword | f_l:keyword
746746
10009 | Peac | P
747747
10010 | Piveteau | P
748748
;
749+
750+
replace regex
751+
from hosts | where host == "epsilon" | eval l1=replace(host_group, "\\s+", "") | sort l1 | keep l1;
752+
753+
l1:keyword
754+
Gatewayinstances
755+
Gatewayinstances
756+
null
757+
;

x-pack/plugin/esql/src/main/java/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@
2020
public final class ReplaceEvaluator implements EvalOperator.ExpressionEvaluator {
2121
private final EvalOperator.ExpressionEvaluator str;
2222

23-
private final EvalOperator.ExpressionEvaluator oldStr;
23+
private final EvalOperator.ExpressionEvaluator regex;
2424

2525
private final EvalOperator.ExpressionEvaluator newStr;
2626

2727
public ReplaceEvaluator(EvalOperator.ExpressionEvaluator str,
28-
EvalOperator.ExpressionEvaluator oldStr, EvalOperator.ExpressionEvaluator newStr) {
28+
EvalOperator.ExpressionEvaluator regex, EvalOperator.ExpressionEvaluator newStr) {
2929
this.str = str;
30-
this.oldStr = oldStr;
30+
this.regex = regex;
3131
this.newStr = newStr;
3232
}
3333

@@ -38,69 +38,69 @@ public Block eval(Page page) {
3838
return Block.constantNullBlock(page.getPositionCount());
3939
}
4040
BytesRefBlock strBlock = (BytesRefBlock) strUncastBlock;
41-
Block oldStrUncastBlock = oldStr.eval(page);
42-
if (oldStrUncastBlock.areAllValuesNull()) {
41+
Block regexUncastBlock = regex.eval(page);
42+
if (regexUncastBlock.areAllValuesNull()) {
4343
return Block.constantNullBlock(page.getPositionCount());
4444
}
45-
BytesRefBlock oldStrBlock = (BytesRefBlock) oldStrUncastBlock;
45+
BytesRefBlock regexBlock = (BytesRefBlock) regexUncastBlock;
4646
Block newStrUncastBlock = newStr.eval(page);
4747
if (newStrUncastBlock.areAllValuesNull()) {
4848
return Block.constantNullBlock(page.getPositionCount());
4949
}
5050
BytesRefBlock newStrBlock = (BytesRefBlock) newStrUncastBlock;
5151
BytesRefVector strVector = strBlock.asVector();
5252
if (strVector == null) {
53-
return eval(page.getPositionCount(), strBlock, oldStrBlock, newStrBlock);
53+
return eval(page.getPositionCount(), strBlock, regexBlock, newStrBlock);
5454
}
55-
BytesRefVector oldStrVector = oldStrBlock.asVector();
56-
if (oldStrVector == null) {
57-
return eval(page.getPositionCount(), strBlock, oldStrBlock, newStrBlock);
55+
BytesRefVector regexVector = regexBlock.asVector();
56+
if (regexVector == null) {
57+
return eval(page.getPositionCount(), strBlock, regexBlock, newStrBlock);
5858
}
5959
BytesRefVector newStrVector = newStrBlock.asVector();
6060
if (newStrVector == null) {
61-
return eval(page.getPositionCount(), strBlock, oldStrBlock, newStrBlock);
61+
return eval(page.getPositionCount(), strBlock, regexBlock, newStrBlock);
6262
}
63-
return eval(page.getPositionCount(), strVector, oldStrVector, newStrVector).asBlock();
63+
return eval(page.getPositionCount(), strVector, regexVector, newStrVector).asBlock();
6464
}
6565

66-
public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock oldStrBlock,
66+
public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock regexBlock,
6767
BytesRefBlock newStrBlock) {
6868
BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
6969
BytesRef strScratch = new BytesRef();
70-
BytesRef oldStrScratch = new BytesRef();
70+
BytesRef regexScratch = new BytesRef();
7171
BytesRef newStrScratch = new BytesRef();
7272
position: for (int p = 0; p < positionCount; p++) {
7373
if (strBlock.isNull(p) || strBlock.getValueCount(p) != 1) {
7474
result.appendNull();
7575
continue position;
7676
}
77-
if (oldStrBlock.isNull(p) || oldStrBlock.getValueCount(p) != 1) {
77+
if (regexBlock.isNull(p) || regexBlock.getValueCount(p) != 1) {
7878
result.appendNull();
7979
continue position;
8080
}
8181
if (newStrBlock.isNull(p) || newStrBlock.getValueCount(p) != 1) {
8282
result.appendNull();
8383
continue position;
8484
}
85-
result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), oldStrBlock.getBytesRef(oldStrBlock.getFirstValueIndex(p), oldStrScratch), newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch)));
85+
result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), regexBlock.getBytesRef(regexBlock.getFirstValueIndex(p), regexScratch), newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch)));
8686
}
8787
return result.build();
8888
}
8989

9090
public BytesRefVector eval(int positionCount, BytesRefVector strVector,
91-
BytesRefVector oldStrVector, BytesRefVector newStrVector) {
91+
BytesRefVector regexVector, BytesRefVector newStrVector) {
9292
BytesRefVector.Builder result = BytesRefVector.newVectorBuilder(positionCount);
9393
BytesRef strScratch = new BytesRef();
94-
BytesRef oldStrScratch = new BytesRef();
94+
BytesRef regexScratch = new BytesRef();
9595
BytesRef newStrScratch = new BytesRef();
9696
position: for (int p = 0; p < positionCount; p++) {
97-
result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), oldStrVector.getBytesRef(p, oldStrScratch), newStrVector.getBytesRef(p, newStrScratch)));
97+
result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), regexVector.getBytesRef(p, regexScratch), newStrVector.getBytesRef(p, newStrScratch)));
9898
}
9999
return result.build();
100100
}
101101

102102
@Override
103103
public String toString() {
104-
return "ReplaceEvaluator[" + "str=" + str + ", oldStr=" + oldStr + ", newStr=" + newStr + "]";
104+
return "ReplaceEvaluator[" + "str=" + str + ", regex=" + regex + ", newStr=" + newStr + "]";
105105
}
106106
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.List;
2525
import java.util.function.Function;
2626
import java.util.function.Supplier;
27+
import java.util.regex.PatternSyntaxException;
2728

2829
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
2930
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
@@ -34,12 +35,12 @@ public class Replace extends ScalarFunction implements EvaluatorMapper {
3435

3536
private final Expression str;
3637
private final Expression newStr;
37-
private final Expression oldStr;
38+
private final Expression regex;
3839

39-
public Replace(Source source, Expression str, Expression oldStr, Expression newStr) {
40-
super(source, Arrays.asList(str, oldStr, newStr));
40+
public Replace(Source source, Expression str, Expression regex, Expression newStr) {
41+
super(source, Arrays.asList(str, regex, newStr));
4142
this.str = str;
42-
this.oldStr = oldStr;
43+
this.regex = regex;
4344
this.newStr = newStr;
4445
}
4546

@@ -59,7 +60,7 @@ protected TypeResolution resolveType() {
5960
return resolution;
6061
}
6162

62-
resolution = isString(oldStr, sourceText(), SECOND);
63+
resolution = isString(regex, sourceText(), SECOND);
6364
if (resolution.unresolved()) {
6465
return resolution;
6566
}
@@ -69,7 +70,7 @@ protected TypeResolution resolveType() {
6970

7071
@Override
7172
public boolean foldable() {
72-
return str.foldable() && oldStr.foldable() && newStr.foldable();
73+
return str.foldable() && regex.foldable() && newStr.foldable();
7374
}
7475

7576
@Override
@@ -78,12 +79,16 @@ public Object fold() {
7879
}
7980

8081
@Evaluator
81-
static BytesRef process(BytesRef str, BytesRef oldStr, BytesRef newStr) {
82-
if (str == null || oldStr == null || newStr == null) {
82+
static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
83+
if (str == null || regex == null || newStr == null) {
8384
return null;
8485
}
8586

86-
return new BytesRef(str.utf8ToString().replace(oldStr.utf8ToString(), newStr.utf8ToString()));
87+
try {
88+
return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString()));
89+
} catch (PatternSyntaxException ex) {
90+
throw new IllegalArgumentException("The provided regex was invalid", ex);
91+
}
8792
}
8893

8994
@Override
@@ -93,7 +98,7 @@ public Expression replaceChildren(List<Expression> newChildren) {
9398

9499
@Override
95100
protected NodeInfo<? extends Expression> info() {
96-
return NodeInfo.create(this, Replace::new, str, oldStr, newStr);
101+
return NodeInfo.create(this, Replace::new, str, regex, newStr);
97102
}
98103

99104
@Override
@@ -106,9 +111,9 @@ public Supplier<EvalOperator.ExpressionEvaluator> toEvaluator(
106111
Function<Expression, Supplier<EvalOperator.ExpressionEvaluator>> toEvaluator
107112
) {
108113
Supplier<EvalOperator.ExpressionEvaluator> strEval = toEvaluator.apply(str);
109-
Supplier<EvalOperator.ExpressionEvaluator> oldStrEval = toEvaluator.apply(oldStr);
114+
Supplier<EvalOperator.ExpressionEvaluator> regexEval = toEvaluator.apply(regex);
110115
Supplier<EvalOperator.ExpressionEvaluator> newStrEval = toEvaluator.apply(newStr);
111116

112-
return () -> new ReplaceEvaluator(strEval.get(), oldStrEval.get(), newStrEval.get());
117+
return () -> new ReplaceEvaluator(strEval.get(), regexEval.get(), newStrEval.get());
113118
}
114119
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.function.Supplier;
2525

2626
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
27+
import static org.hamcrest.Matchers.containsString;
2728
import static org.hamcrest.Matchers.equalTo;
2829

2930
public class ReplaceTests extends AbstractScalarFunctionTestCase {
@@ -79,6 +80,17 @@ public void testReplaceString() {
7980
assertThat(process("a tiger", " ti", ""), equalTo("ager"));
8081
}
8182

83+
public void testReplaceRegex() {
84+
assertThat(process("what a nice day", "\\s+", "-"), equalTo("what-a-nice-day"));
85+
assertThat(process("I love cats and cats are amazing.", "\\bcats\\b", "dogs"),
86+
equalTo("I love dogs and dogs are amazing."));
87+
}
88+
89+
public void testInvalidRegex() {
90+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> process("a tiger", "\\", "any"));
91+
assertThat(ex.getMessage(), containsString("regex was invalid"));
92+
}
93+
8294
public void testUnicode() {
8395
final String s = "a\ud83c\udf09tiger";
8496
assertThat(process(s, "a\ud83c\udf09t", "pp"), equalTo("ppiger"));

0 commit comments

Comments
 (0)