Skip to content

Commit c121fa8

Browse files
authored
change Anonymizer to mask PPL (#4352)
* change Anonymizer Signed-off-by: xinyual <xinyual@amazon.com> * fix case Signed-off-by: xinyual <xinyual@amazon.com> --------- Signed-off-by: xinyual <xinyual@amazon.com>
1 parent 69a718b commit c121fa8

2 files changed

Lines changed: 209 additions & 159 deletions

File tree

ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ public class PPLQueryDataAnonymizer extends AbstractNodeVisitor<String, String>
105105

106106
private static final String MASK_LITERAL = "***";
107107

108+
private static final String MASK_COLUMN = "identifier";
109+
110+
private static final String MASK_TABLE = "table";
111+
108112
private final AnonymizerExpressionAnalyzer expressionAnalyzer;
109113
private final Settings settings;
110114

@@ -143,12 +147,9 @@ public String visitExplain(Explain node, String context) {
143147
@Override
144148
public String visitRelation(Relation node, String context) {
145149
if (node instanceof DescribeRelation) {
146-
// remove the system table suffix
147-
String systemTable = node.getTableQualifiedName().toString();
148-
return StringUtils.format(
149-
"describe %s", systemTable.substring(0, systemTable.lastIndexOf('.')));
150+
return StringUtils.format("describe %s", MASK_TABLE);
150151
}
151-
return StringUtils.format("source=%s", node.getTableQualifiedName().toString());
152+
return StringUtils.format("source=%s", MASK_TABLE);
152153
}
153154

154155
@Override
@@ -183,31 +184,30 @@ public String visitJoin(Join node, String context) {
183184
.toList());
184185
return StringUtils.format(
185186
"%s | join type=%s overwrite=%s max=%s %s %s",
186-
left, joinType, overwrite, max, fieldList, right);
187+
left, joinType, MASK_LITERAL, MASK_LITERAL, fieldList, right);
187188
} else {
188189
String joinType = node.getJoinType().name().toLowerCase(Locale.ROOT);
189-
String leftAlias = node.getLeftAlias().map(l -> " left = " + l).orElse("");
190-
String rightAlias = node.getRightAlias().map(r -> " right = " + r).orElse("");
190+
String leftAlias = node.getLeftAlias().map(l -> " left = " + MASK_COLUMN).orElse("");
191+
String rightAlias = node.getRightAlias().map(r -> " right = " + MASK_COLUMN).orElse("");
191192
String condition =
192193
node.getJoinCondition().map(c -> expressionAnalyzer.analyze(c, context)).orElse("true");
193194
return StringUtils.format(
194195
"%s | %s join max=%s%s%s on %s %s",
195-
left, joinType, max, leftAlias, rightAlias, condition, right);
196+
left, joinType, MASK_LITERAL, leftAlias, rightAlias, condition, right);
196197
}
197198
}
198199

199200
@Override
200201
public String visitLookup(Lookup node, String context) {
201202
String child = node.getChild().get(0).accept(this, context);
202-
String lookupTable = ((Relation) node.getLookupRelation()).getTableQualifiedName().toString();
203203
String mappingFields = formatFieldAlias(node.getMappingAliasMap());
204204
String strategy =
205205
node.getOutputAliasMap().isEmpty()
206206
? ""
207207
: String.format(" %s ", node.getOutputStrategy().toString().toLowerCase());
208208
String outputFields = formatFieldAlias(node.getOutputAliasMap());
209209
return StringUtils.format(
210-
"%s | lookup %s %s%s%s", child, lookupTable, mappingFields, strategy, outputFields);
210+
"%s | lookup %s %s%s%s", child, MASK_TABLE, mappingFields, strategy, outputFields);
211211
}
212212

213213
private String formatFieldAlias(java.util.Map<String, String> fieldMap) {
@@ -230,7 +230,7 @@ public String visitSubqueryAlias(SubqueryAlias node, String context) {
230230
}
231231
// add "[]" only if its child is not a root
232232
String format = childNode.getChild().isEmpty() ? "%s as %s" : "[ %s ] as %s";
233-
return StringUtils.format(format, child, node.getAlias());
233+
return StringUtils.format(format, child, MASK_COLUMN);
234234
}
235235

236236
@Override
@@ -270,8 +270,8 @@ public String visitRename(Rename node, String context) {
270270
((Field) renameMap.getTarget()).getField().toString());
271271
}
272272
String renames =
273-
renameMapBuilder.build().entrySet().stream()
274-
.map(entry -> StringUtils.format("%s as %s", entry.getKey(), entry.getValue()))
273+
node.getRenameList().stream()
274+
.map(entry -> StringUtils.format("%s as %s", MASK_COLUMN, MASK_COLUMN))
275275
.collect(Collectors.joining(","));
276276
return StringUtils.format("%s | rename %s", child, renames);
277277
}
@@ -336,7 +336,7 @@ public String visitBin(Bin node, String context) {
336336
}
337337

338338
if (node.getAlias() != null) {
339-
binCommand.append(" as ").append(node.getAlias());
339+
binCommand.append(" as ").append(MASK_COLUMN);
340340
}
341341

342342
return StringUtils.format("%s%s", child, binCommand.toString());
@@ -406,7 +406,7 @@ public String visitEval(Eval node, String context) {
406406
}
407407
String expressions =
408408
expressionsBuilder.build().stream()
409-
.map(pair -> StringUtils.format("%s" + "=%s", pair.getLeft(), pair.getRight()))
409+
.map(pair -> StringUtils.format("%s" + "=%s", MASK_COLUMN, pair.getRight()))
410410
.collect(Collectors.joining(" "));
411411
return StringUtils.format("%s | eval %s", child, expressions);
412412
}
@@ -496,7 +496,7 @@ public String visitTimechart(Timechart node, String context) {
496496
public String visitRex(Rex node, String context) {
497497
String child = node.getChild().get(0).accept(this, context);
498498
String field = visitExpression(node.getField());
499-
String pattern = "\"" + node.getPattern().toString() + "\"";
499+
String pattern = "\"" + MASK_LITERAL + "\"";
500500
StringBuilder command = new StringBuilder();
501501

502502
command.append(
@@ -505,11 +505,11 @@ public String visitRex(Rex node, String context) {
505505
child, field, node.getMode().toString().toLowerCase(), pattern));
506506

507507
if (node.getMaxMatch().isPresent()) {
508-
command.append(" max_match=").append(node.getMaxMatch().get());
508+
command.append(" max_match=").append(MASK_LITERAL);
509509
}
510510

511511
if (node.getOffsetField().isPresent()) {
512-
command.append(" offset_field=").append(node.getOffsetField().get());
512+
command.append(" offset_field=").append(MASK_COLUMN);
513513
}
514514

515515
return command.toString();
@@ -535,7 +535,7 @@ public String visitParse(Parse node, String context) {
535535
}
536536
return ParseMethod.PATTERNS.equals(node.getParseMethod()) && regex.isEmpty()
537537
? StringUtils.format("%s | %s %s", child, commandName, source)
538-
: StringUtils.format("%s | %s %s '%s'", child, commandName, source, regex);
538+
: StringUtils.format("%s | %s %s '%s'", child, commandName, source, MASK_LITERAL);
539539
}
540540

541541
@Override
@@ -646,7 +646,7 @@ public String visitPatterns(Patterns node, String context) {
646646
builder.append(" mode=").append(node.getPatternMode().toString());
647647
builder.append(" max_sample_count=").append(visitExpression(node.getPatternMaxSampleCount()));
648648
builder.append(" buffer_limit=").append(visitExpression(node.getPatternBufferLimit()));
649-
builder.append(" new_field=").append(node.getAlias());
649+
builder.append(" new_field=").append(MASK_COLUMN);
650650
if (!node.getArguments().isEmpty()) {
651651
for (java.util.Map.Entry<String, Literal> entry : node.getArguments().entrySet()) {
652652
builder.append(
@@ -780,7 +780,7 @@ public String visitIn(In node, String context) {
780780

781781
@Override
782782
public String visitField(Field node, String context) {
783-
return node.getField().toString();
783+
return MASK_COLUMN;
784784
}
785785

786786
@Override
@@ -802,7 +802,7 @@ public String visitAlias(Alias node, String context) {
802802
@Override
803803
public String visitTrendlineComputation(Trendline.TrendlineComputation node, String context) {
804804
final String dataField = node.getDataField().accept(this, context);
805-
final String aliasClause = " as " + node.getAlias();
805+
final String aliasClause = " as " + MASK_COLUMN;
806806
final String computationType = node.getComputationType().name().toLowerCase(Locale.ROOT);
807807
return StringUtils.format(
808808
"%s(%d, %s)%s", computationType, node.getNumberOfDataPoints(), dataField, aliasClause);
@@ -831,7 +831,7 @@ public String visitExistsSubquery(ExistsSubquery node, String context) {
831831
@Override
832832
public String visitCase(Case node, String context) {
833833
StringBuilder builder = new StringBuilder();
834-
builder.append("cast(");
834+
builder.append("case(");
835835
for (When when : node.getWhenClauses()) {
836836
builder.append(analyze(when.getCondition(), context));
837837
builder.append(",");
@@ -858,7 +858,7 @@ public String visitCast(Cast node, String context) {
858858
@Override
859859
public String visitQualifiedName(
860860
org.opensearch.sql.ast.expression.QualifiedName node, String context) {
861-
return String.join(".", node.getParts());
861+
return MASK_COLUMN;
862862
}
863863
}
864864
}

0 commit comments

Comments
 (0)