Skip to content

Commit e258a5d

Browse files
martijnvgKubik42
authored andcommitted
Introduce a new ignored source format that uses binary doc values
1 parent a056736 commit e258a5d

17 files changed

Lines changed: 771 additions & 173 deletions

File tree

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ private static Version parseUnchecked(String version) {
225225
public static final IndexVersion TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3 = def(9_072_0_00, Version.LUCENE_10_3_2);
226226
public static final IndexVersion STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES = def(9_073_0_00, Version.LUCENE_10_3_2);
227227
public static final IndexVersion DISABLE_SEQUENCE_NUMBERS = def(9_074_0_00, Version.LUCENE_10_3_2);
228+
public static final IndexVersion IGNORED_SOURCE_AS_DOC_VALUES = def(9_075_0_00, Version.LUCENE_10_3_2);
228229

229230
/*
230231
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat;
2727
import org.elasticsearch.index.mapper.CompletionFieldMapper;
2828
import org.elasticsearch.index.mapper.IdFieldMapper;
29+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
2930
import org.elasticsearch.index.mapper.Mapper;
3031
import org.elasticsearch.index.mapper.MapperService;
3132
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
@@ -59,6 +60,7 @@ public class PerFieldFormatSupplier {
5960
includeMetaField.add(TimeSeriesIdFieldMapper.NAME);
6061
includeMetaField.add(TimeSeriesRoutingHashFieldMapper.NAME);
6162
includeMetaField.add(SeqNoFieldMapper.NAME);
63+
includeMetaField.add(IgnoredSourceFieldMapper.NAME);
6264
// Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in
6365
// RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values.
6466
INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField);

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,6 @@ public static StoredFieldLoader create(boolean loadSource, Set<String> fields) {
8686
* otherwise, uses the heuristic defined in {@link StoredFieldLoader#reader(LeafReaderContext, int[])}.
8787
*/
8888
public static StoredFieldLoader create(boolean loadSource, Set<String> fields, boolean forceSequentialReader) {
89-
if (loadSource == false && fields.isEmpty()) {
90-
return StoredFieldLoader.empty();
91-
}
9289
List<String> fieldsToLoad = fieldsToLoad(loadSource, fields);
9390
return new StoredFieldLoader() {
9491
@Override
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper;
11+
12+
import org.apache.lucene.util.BytesRef;
13+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding;
14+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.IgnoredSourceLeafLoader;
15+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.NameValue;
16+
import org.elasticsearch.search.lookup.SourceFilter;
17+
18+
import java.util.ArrayList;
19+
import java.util.HashMap;
20+
import java.util.List;
21+
import java.util.Map;
22+
import java.util.Set;
23+
24+
/**
25+
* {@link IgnoredSourceLeafLoader} for the {@link IgnoredSourceFieldMapper.IgnoredSourceFormat#COALESCED_SINGLE_IGNORED_SOURCE} format.
26+
* All values for a given field path are grouped into a single stored field entry, encoded via {@link CoalescedIgnoredSourceEncoding}.
27+
* Each entry is a binary blob with the format: {@code [count][field_name][parentOffset1][value1][parentOffset2][value2]...}
28+
*
29+
* <p>For example, a document with:
30+
* <pre>{@code
31+
* { "obj": { "foo": "a", "foo": "b" }, "bar": 42 }
32+
* }</pre>
33+
* where {@code obj.foo} and {@code bar} are ignored, would produce two stored field entries under {@code _ignored_source}:
34+
* <ul>
35+
* <li>{@code [2]["obj.foo"][4]["a"][4]["b"]} — count=2, field name, then two (parentOffset, value) pairs</li>
36+
* <li>{@code [1]["bar"][0][42]} — count=1, field name, then one (parentOffset, value) pair</li>
37+
* </ul>
38+
*/
39+
class CoalescedIgnoredSourceLeafLoader implements IgnoredSourceLeafLoader {
40+
41+
@Override
42+
public Map<String, List<NameValue>> loadAllIgnoredFields(SourceFilter filter, Map<String, List<Object>> storedFields, int docId) {
43+
var ignoredStoredValues = storedFields.get(IgnoredSourceFieldMapper.NAME);
44+
if (ignoredStoredValues == null) {
45+
return null;
46+
}
47+
48+
Map<String, List<NameValue>> objectsWithIgnoredFields = new HashMap<>();
49+
for (var ignoredSourceEntry : ignoredStoredValues) {
50+
List<NameValue> nameValues = decodeEntry(ignoredSourceEntry);
51+
52+
// Filter out entries excluded by the source filter and group the rest by parent field name.
53+
for (var nameValue : nameValues) {
54+
if (filter != null && filter.isPathFiltered(nameValue.name(), XContentDataHelper.isEncodedObject(nameValue.value()))) {
55+
continue;
56+
}
57+
objectsWithIgnoredFields.computeIfAbsent(nameValue.getParentFieldName(), k -> new ArrayList<>()).add(nameValue);
58+
}
59+
}
60+
return objectsWithIgnoredFields;
61+
}
62+
63+
@Override
64+
public Map<String, List<NameValue>> loadSingleIgnoredField(Set<String> fieldPaths, Map<String, List<Object>> storedFields, int docId) {
65+
var ignoredStoredValues = storedFields.get(IgnoredSourceFieldMapper.NAME);
66+
if (ignoredStoredValues == null) {
67+
return Map.of();
68+
}
69+
70+
Map<String, List<NameValue>> valuesForFieldAndParents = new HashMap<>();
71+
for (var ignoredSourceEntry : ignoredStoredValues) {
72+
List<NameValue> nameValues = decodeEntry(ignoredSourceEntry);
73+
74+
// All values in a coalesced entry share the same field name; check if it matches one of the requested paths.
75+
String fieldPath = nameValues.getFirst().name();
76+
if (fieldPaths.contains(fieldPath)) {
77+
// Each field path appears in at most one coalesced entry.
78+
assert valuesForFieldAndParents.containsKey(fieldPath) == false;
79+
valuesForFieldAndParents.put(fieldPath, nameValues);
80+
}
81+
}
82+
return valuesForFieldAndParents;
83+
}
84+
85+
/**
86+
* Decodes a single coalesced entry.
87+
*/
88+
@SuppressWarnings("unchecked")
89+
private static List<NameValue> decodeEntry(Object ignoredSourceEntry) {
90+
// The entry is either already decoded (NameValue list) or a BytesRef that needs to be decoded
91+
List<NameValue> nameValues = (ignoredSourceEntry instanceof List<?>)
92+
? (List<NameValue>) ignoredSourceEntry
93+
: CoalescedIgnoredSourceEncoding.decode((BytesRef) ignoredSourceEntry);
94+
assert nameValues.isEmpty() == false;
95+
return nameValues;
96+
}
97+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper;
11+
12+
import org.apache.lucene.index.LeafReader;
13+
import org.elasticsearch.index.fielddata.MultiValuedSortedBinaryDocValues;
14+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.IgnoredSourceLeafLoader;
15+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.LegacyIgnoredSourceEncoding;
16+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper.NameValue;
17+
import org.elasticsearch.search.lookup.SourceFilter;
18+
19+
import java.io.IOException;
20+
import java.util.ArrayList;
21+
import java.util.HashMap;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.Set;
25+
26+
/**
27+
* {@link IgnoredSourceLeafLoader} for the {@link IgnoredSourceFieldMapper.IgnoredSourceFormat#DOC_VALUES_IGNORED_SOURCE} format.
28+
* All ignored-source entries for a document are stored in a single binary doc value using the
29+
* {@link MultiValuedBinaryDocValuesField.IntegratedCount} format, where each individual entry is encoded via
30+
* {@link LegacyIgnoredSourceEncoding}.
31+
*
32+
* <p>For example, a document with:
33+
* <pre>{@code
34+
* { "obj": { "foo": "a" }, "bar": 42 }
35+
* }</pre>
36+
* where {@code obj.foo} and {@code bar} are ignored, would produce a single binary doc value containing both entries.
37+
*/
38+
class DocValuesIgnoredSourceLeafLoader implements IgnoredSourceLeafLoader {
39+
40+
private final MultiValuedSortedBinaryDocValues docValues;
41+
42+
DocValuesIgnoredSourceLeafLoader(LeafReader leafReader) throws IOException {
43+
this.docValues = MultiValuedSortedBinaryDocValues.from(leafReader, IgnoredSourceFieldMapper.NAME);
44+
}
45+
46+
@Override
47+
public Map<String, List<NameValue>> loadAllIgnoredFields(SourceFilter filter, Map<String, List<Object>> storedFields, int docId)
48+
throws IOException {
49+
// Advance doc values to the target document
50+
if (docValues == null || docValues.advanceExact(docId) == false) {
51+
return null;
52+
}
53+
54+
Map<String, List<NameValue>> objectsWithIgnoredFields = new HashMap<>();
55+
int count = docValues.docValueCount();
56+
57+
for (int i = 0; i < count; i++) {
58+
NameValue nv = LegacyIgnoredSourceEncoding.decode(docValues.nextValue());
59+
60+
// Skip entries excluded by the source filter.
61+
if (filter != null && filter.isPathFiltered(nv.name(), XContentDataHelper.isEncodedObject(nv.value()))) {
62+
continue;
63+
}
64+
65+
// Group by parent field name so the caller can reconstruct each object.
66+
objectsWithIgnoredFields.computeIfAbsent(nv.getParentFieldName(), k -> new ArrayList<>()).add(nv);
67+
}
68+
return objectsWithIgnoredFields;
69+
}
70+
71+
@Override
72+
public Map<String, List<NameValue>> loadSingleIgnoredField(Set<String> fieldPaths, Map<String, List<Object>> storedFields, int docId)
73+
throws IOException {
74+
// Advance doc values to the target document
75+
if (docValues == null || docValues.advanceExact(docId) == false) {
76+
return Map.of();
77+
}
78+
79+
Map<String, List<NameValue>> valuesForFieldAndParents = new HashMap<>();
80+
int count = docValues.docValueCount();
81+
82+
for (int i = 0; i < count; i++) {
83+
NameValue nv = LegacyIgnoredSourceEncoding.decode(docValues.nextValue());
84+
85+
// Collect only entries matching one of the requested field paths.
86+
if (fieldPaths.contains(nv.name())) {
87+
valuesForFieldAndParents.computeIfAbsent(nv.name(), k -> new ArrayList<>()).add(nv);
88+
}
89+
}
90+
return valuesForFieldAndParents;
91+
}
92+
}

server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,27 @@ protected FallbackSyntheticSourceBlockLoader(
5757
this.fieldPaths = splitIntoFieldPaths(fieldName);
5858
}
5959

60+
/**
61+
* Returns the ignored source format used by this loader.
62+
*/
63+
public IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat() {
64+
return ignoredSourceFormat;
65+
}
66+
6067
@Override
6168
public IOFunction<CircuitBreaker, ColumnAtATimeReader> columnAtATimeReader(LeafReaderContext context) {
6269
return null;
6370
}
6471

6572
@Override
6673
public RowStrideReader rowStrideReader(CircuitBreaker breaker, LeafReaderContext context) throws IOException {
67-
return new IgnoredSourceRowStrideReader<>(breaker, fieldName, fieldPaths, reader, ignoredSourceFormat);
74+
return new IgnoredSourceRowStrideReader<>(
75+
breaker,
76+
fieldName,
77+
fieldPaths,
78+
reader,
79+
ignoredSourceFormat.createLeafLoader(context.reader())
80+
);
6881
}
6982

7083
@Override
@@ -107,28 +120,29 @@ private static class IgnoredSourceRowStrideReader<T> implements RowStrideReader
107120
// Contains name of the field and all its parents
108121
private final Set<String> fieldPaths;
109122
private final Reader<T> reader;
110-
private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat;
123+
private final IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader;
111124

112125
IgnoredSourceRowStrideReader(
113126
CircuitBreaker breaker,
114127
String fieldName,
115128
Set<String> fieldPaths,
116129
Reader<T> reader,
117-
IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat
130+
IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader
118131
) {
119132
breaker.addEstimateBytesAndMaybeBreak(ESTIMATED_SIZE, "load blocks");
120133
this.breaker = breaker;
121134
this.fieldName = fieldName;
122135
this.fieldPaths = fieldPaths;
123136
this.reader = reader;
124-
this.ignoredSourceFormat = ignoredSourceFormat;
137+
this.ignoredSourceLeafLoader = ignoredSourceLeafLoader;
125138
}
126139

127140
@Override
128141
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
129-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = ignoredSourceFormat.loadSingleIgnoredField(
142+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = ignoredSourceLeafLoader.loadSingleIgnoredField(
130143
fieldPaths,
131-
storedFields.storedFields()
144+
storedFields.storedFields(),
145+
docId
132146
);
133147

134148
if (valuesForFieldAndParents.isEmpty()) {

0 commit comments

Comments
 (0)