-
Notifications
You must be signed in to change notification settings - Fork 8.4k
Expand file tree
/
Copy pathMergeTreeIndexMinMax.h
More file actions
145 lines (109 loc) · 5.04 KB
/
MergeTreeIndexMinMax.h
File metadata and controls
145 lines (109 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#pragma once
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/KeyCondition.h>
namespace DB
{
struct MergeTreeIndexGranuleMinMax final : public IMergeTreeIndexGranule
{
MergeTreeIndexGranuleMinMax(const String & index_name_, const Block & index_sample_block_);
MergeTreeIndexGranuleMinMax(
const String & index_name_,
const Block & index_sample_block_,
std::vector<Range> && hyperrectangle_);
~MergeTreeIndexGranuleMinMax() override = default;
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
bool empty() const override { return hyperrectangle.empty(); }
size_t memoryUsageBytes() const override { return hyperrectangle.capacity() * sizeof(Range); }
const String & index_name;
const Block & index_sample_block;
std::vector<Range> hyperrectangle;
Serializations serializations;
DataTypes datatypes;
FormatSettings format_settings;
};
struct MergeTreeIndexAggregatorMinMax final : IMergeTreeIndexAggregator
{
MergeTreeIndexAggregatorMinMax(const String & index_name_, const Block & index_sample_block);
~MergeTreeIndexAggregatorMinMax() override = default;
bool empty() const override { return hyperrectangle.empty(); }
MergeTreeIndexGranulePtr getGranuleAndReset() override;
void update(const Block & block, size_t * pos, size_t limit) override;
String index_name;
Block index_sample_block;
std::vector<Range> hyperrectangle;
};
class MergeTreeIndexConditionMinMax final : public IMergeTreeIndexCondition
{
public:
MergeTreeIndexConditionMinMax(
const IndexDescription & index,
const ActionsDAGWithInversionPushDown & filter_dag,
ContextPtr context);
bool alwaysUnknownOrTrue() const override;
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule, const UpdatePartialDisjunctionResultFn & update_partial_disjunction_result_fn) const override;
std::string getDescription() const override;
~MergeTreeIndexConditionMinMax() override = default;
private:
DataTypes index_data_types;
KeyCondition condition;
};
class MergeTreeIndexMinMax : public IMergeTreeIndex
{
public:
explicit MergeTreeIndexMinMax(const IndexDescription & index_)
: IMergeTreeIndex(index_)
{}
~MergeTreeIndexMinMax() override = default;
MergeTreeIndexGranulePtr createIndexGranule() const override;
MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
MergeTreeIndexConditionPtr createIndexCondition(
const ActionsDAG::Node * predicate, ContextPtr context) const override;
MergeTreeIndexSubstreams getSubstreams() const override { return {{MergeTreeIndexSubstream::Type::Regular, "", ".idx2"}}; }
MergeTreeIndexFormat getDeserializedFormat(const MergeTreeDataPartChecksums & checksums, const std::string & path_prefix) const override; /// NOLINT
};
struct MergeTreeIndexBulkGranulesMinMax final : public IMergeTreeIndexBulkGranules
{
struct MinMaxGranule
{
size_t granule_num;
Field min_or_max_value;
};
struct MinMaxGranuleItem
{
int direction;
size_t part_index;
size_t granule_num;
Field min_or_max_value;
/// If sort by ASC, then max-heap of min values, if sort by DESC, min-heap of max values
bool operator < (const MinMaxGranuleItem & b) const
{
return (direction == 1 ? (min_or_max_value < b.min_or_max_value) : (min_or_max_value > b.min_or_max_value));
}
};
explicit MergeTreeIndexBulkGranulesMinMax(const String & index_name_, const Block & index_sample_block_,
size_t index_granularity_, int direction_, size_t size_hint_, size_t last_part_granule_, bool store_map_ = false);
void deserializeBinary(size_t granule_num, ReadBuffer & istr, MergeTreeIndexVersion version) override;
void getTopKMarks(size_t n, bool handle_ties, std::vector<MinMaxGranule> & result);
static void getTopKMarks(int direction, size_t n, size_t index_granularity, bool handle_ties,
const std::vector<std::vector<MinMaxGranule>> & parts, std::vector<MarkRanges> & result);
std::vector<MinMaxGranule> granules;
std::unordered_map<size_t, size_t> granules_map;
private:
template<bool handle_ties>
void getTopKMarks(size_t n, std::vector<MinMaxGranule> & result);
template<bool handle_ties>
static void getTopKMarks(int direction, size_t n, size_t index_granularity, const std::vector<std::vector<MinMaxGranule>> & parts, std::vector<MarkRanges> & result);
SerializationPtr serialization;
[[maybe_unused]] const String & index_name;
const Block & index_sample_block;
FormatSettings format_settings;
size_t index_granularity;
int direction;
size_t last_part_granule;
bool empty = true;
bool store_map = false;
};
using MergeTreeIndexBulkGranulesMinMaxPtr = std::shared_ptr<MergeTreeIndexBulkGranulesMinMax>;
}