-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Expand file tree
/
Copy pathDataFile.java
More file actions
167 lines (159 loc) · 6 KB
/
DataFile.java
File metadata and controls
167 lines (159 loc) · 6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import java.util.List;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
/** Interface for data files listed in a table manifest. */
public interface DataFile extends ContentFile<DataFile> {
// fields for adding delete data files
Types.NestedField CONTENT =
optional(
134,
"content",
IntegerType.get(),
"Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
Types.NestedField FILE_PATH =
required(100, "file_path", StringType.get(), "Location URI with FS scheme");
Types.NestedField FILE_FORMAT =
required(101, "file_format", StringType.get(), "File format name: avro, orc, or parquet");
Types.NestedField RECORD_COUNT =
required(103, "record_count", LongType.get(), "Number of records in the file");
Types.NestedField FILE_SIZE =
required(104, "file_size_in_bytes", LongType.get(), "Total file size in bytes");
Types.NestedField COLUMN_SIZES =
optional(
108,
"column_sizes",
MapType.ofRequired(117, 118, IntegerType.get(), LongType.get()),
"Map of column id to total size on disk");
Types.NestedField VALUE_COUNTS =
optional(
109,
"value_counts",
MapType.ofRequired(119, 120, IntegerType.get(), LongType.get()),
"Map of column id to total count, including null and NaN");
Types.NestedField NULL_VALUE_COUNTS =
optional(
110,
"null_value_counts",
MapType.ofRequired(121, 122, IntegerType.get(), LongType.get()),
"Map of column id to null value count");
Types.NestedField NAN_VALUE_COUNTS =
optional(
137,
"nan_value_counts",
MapType.ofRequired(138, 139, IntegerType.get(), LongType.get()),
"Map of column id to number of NaN values in the column");
Types.NestedField LOWER_BOUNDS =
optional(
125,
"lower_bounds",
MapType.ofRequired(126, 127, IntegerType.get(), BinaryType.get()),
"Map of column id to lower bound");
Types.NestedField UPPER_BOUNDS =
optional(
128,
"upper_bounds",
MapType.ofRequired(129, 130, IntegerType.get(), BinaryType.get()),
"Map of column id to upper bound");
Types.NestedField KEY_METADATA =
optional(131, "key_metadata", BinaryType.get(), "Encryption key metadata blob");
Types.NestedField SPLIT_OFFSETS =
optional(
132, "split_offsets", ListType.ofRequired(133, LongType.get()), "Splittable offsets");
Types.NestedField EQUALITY_IDS =
optional(
135,
"equality_ids",
ListType.ofRequired(136, IntegerType.get()),
"Equality comparison field IDs");
Types.NestedField SORT_ORDER_ID =
optional(140, "sort_order_id", IntegerType.get(), "Sort order ID");
Types.NestedField SPEC_ID = optional(141, "spec_id", IntegerType.get(), "Partition spec ID");
Types.NestedField FIRST_ROW_ID =
optional(
142,
"first_row_id",
LongType.get(),
"The first row ID assigned to the first row in the data file");
Types.NestedField REFERENCED_DATA_FILE =
optional(
143,
"referenced_data_file",
StringType.get(),
"Fully qualified location (URI with FS scheme) of a data file that all deletes reference");
Types.NestedField CONTENT_OFFSET =
optional(
144, "content_offset", LongType.get(), "The offset in the file where the content starts");
Types.NestedField CONTENT_SIZE =
optional(
145,
"content_size_in_bytes",
LongType.get(),
"The length of referenced content stored in the file");
int PARTITION_ID = 102;
String PARTITION_NAME = "partition";
String PARTITION_DOC = "Partition data tuple, schema based on the partition spec";
// NEXT ID TO ASSIGN: 146
static StructType getType(StructType partitionType) {
// IDs start at 100 to leave room for changes to ManifestEntry
return StructType.of(
CONTENT,
FILE_PATH,
FILE_FORMAT,
SPEC_ID,
required(PARTITION_ID, PARTITION_NAME, partitionType, PARTITION_DOC),
RECORD_COUNT,
FILE_SIZE,
COLUMN_SIZES,
VALUE_COUNTS,
NULL_VALUE_COUNTS,
NAN_VALUE_COUNTS,
LOWER_BOUNDS,
UPPER_BOUNDS,
KEY_METADATA,
SPLIT_OFFSETS,
EQUALITY_IDS,
SORT_ORDER_ID,
FIRST_ROW_ID,
REFERENCED_DATA_FILE,
CONTENT_OFFSET,
CONTENT_SIZE);
}
/**
* @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
*/
@Override
default FileContent content() {
return FileContent.DATA;
}
@Override
default List<Integer> equalityFieldIds() {
return null;
}
}