Skip to content

Commit 5f8f2fd

Browse files
pdillingerfacebook-github-bot
authored andcommitted
Refactor / clean up / optimize FullFilterBitsReader (#5941)
Summary: FullFilterBitsReader, after creating in BloomFilterPolicy, was responsible for decoding metadata bits. This meant that FullFilterBitsReader::MayMatch had some metadata checks in order to implement "always true" or "always false" functionality in the case of inconsistent or trivial metadata. This made for ugly mixing-of-concerns code and probably had some runtime cost. It also didn't really support plugging in alternative filter implementations with extensions to the existing metadata schema. BloomFilterPolicy::GetFilterBitsReader is now (exclusively) responsible for decoding filter metadata bits and constructing appropriate instances deriving from FilterBitsReader. "Always false" and "always true" derived classes allow FullFilterBitsReader not to be concerned with handling of trivial or inconsistent metadata. This also makes for easy expansion to alternative filter implementations in new, alternative derived classes. This change makes calls to FilterBitsReader::MayMatch *necessarily* virtual because there's now more than one built-in implementation. Compared with the previous implementation's extra 'if' checks in MayMatch, there's no consistent performance difference, measured by (an older revision of) filter_bench (differences here seem to be within noise): Inside queries... - Dry run (407) ns/op: 35.9996 + Dry run (407) ns/op: 35.2034 - Single filter ns/op: 47.5483 + Single filter ns/op: 47.4034 - Batched, prepared ns/op: 43.1559 + Batched, prepared ns/op: 42.2923 ... - Random filter ns/op: 150.697 + Random filter ns/op: 149.403 ---------------------------- Outside queries... - Dry run (980) ns/op: 34.6114 + Dry run (980) ns/op: 34.0405 - Single filter ns/op: 56.8326 + Single filter ns/op: 55.8414 - Batched, prepared ns/op: 48.2346 + Batched, prepared ns/op: 47.5667 - Random filter ns/op: 155.377 + Random filter ns/op: 153.942 Average FP rate %: 1.1386 Also, the FullFilterBitsReader ctor was responsible for a surprising amount of CPU in production, due in part to inefficient determination of the CACHE_LINE_SIZE used to construct the filter being read. The overwhelming common case (same as my CACHE_LINE_SIZE) is now substantially optimized, as shown with filter_bench with -new_reader_every=1 (old option - see below) (repeatable result): Inside queries... - Dry run (453) ns/op: 118.799 + Dry run (453) ns/op: 105.869 - Single filter ns/op: 82.5831 + Single filter ns/op: 74.2509 ... - Random filter ns/op: 224.936 + Random filter ns/op: 194.833 ---------------------------- Outside queries... - Dry run (aa1) ns/op: 118.503 + Dry run (aa1) ns/op: 104.925 - Single filter ns/op: 90.3023 + Single filter ns/op: 83.425 ... - Random filter ns/op: 220.455 + Random filter ns/op: 175.7 Average FP rate %: 1.13886 However PR#5936 has/will reclaim most of this cost. After that PR, the optimization of this code path is likely negligible, but nonetheless it's clear we aren't making performance any worse. Also fixed inadequate check of consistency between filter data size and num_lines. (Unit test updated.) Pull Request resolved: #5941 Test Plan: previously added unit tests FullBloomTest.CorruptFilters and FullBloomTest.RawSchema Differential Revision: D18018353 Pulled By: pdillinger fbshipit-source-id: 8e04c2b4a7d93223f49a237fd52ef2483929ed9c
1 parent fe464bc commit 5f8f2fd

2 files changed

Lines changed: 81 additions & 82 deletions

File tree

util/bloom.cc

Lines changed: 78 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -132,39 +132,27 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
132132
}
133133

134134
namespace {
135+
class AlwaysTrueFilter : public FilterBitsReader {
136+
public:
137+
bool MayMatch(const Slice&) override { return true; }
138+
using FilterBitsReader::MayMatch; // inherit overload
139+
};
140+
141+
class AlwaysFalseFilter : public FilterBitsReader {
142+
public:
143+
bool MayMatch(const Slice&) override { return false; }
144+
using FilterBitsReader::MayMatch; // inherit overload
145+
};
146+
135147
class FullFilterBitsReader : public FilterBitsReader {
136148
public:
137-
explicit FullFilterBitsReader(const Slice& contents)
138-
: data_(contents.data()),
139-
data_len_(static_cast<uint32_t>(contents.size())),
140-
num_probes_(0),
141-
num_lines_(0),
142-
log2_cache_line_size_(0) {
143-
assert(data_);
144-
GetFilterMeta(contents, &num_probes_, &num_lines_);
145-
// Sanitize broken parameter
146-
if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) {
147-
num_lines_ = 0;
148-
num_probes_ = 0;
149-
} else if (num_lines_ != 0) {
150-
while (true) {
151-
uint32_t num_lines_at_curr_cache_size =
152-
(data_len_ - 5) >> log2_cache_line_size_;
153-
if (num_lines_at_curr_cache_size == 0) {
154-
// The cache line size seems not a power of two. It's not supported
155-
// and indicates a corruption so disable using this filter.
156-
// Removed for unit testing corruption: assert(false);
157-
num_lines_ = 0;
158-
num_probes_ = 0;
159-
break;
160-
}
161-
if (num_lines_at_curr_cache_size == num_lines_) {
162-
break;
163-
}
164-
++log2_cache_line_size_;
165-
}
166-
}
167-
}
149+
FullFilterBitsReader(const char* data, int num_probes, uint32_t num_lines,
150+
uint32_t log2_cache_line_size)
151+
: data_(data),
152+
num_probes_(num_probes),
153+
num_lines_(num_lines),
154+
log2_cache_line_size_(log2_cache_line_size) {}
155+
168156
// No Copy allowed
169157
FullFilterBitsReader(const FullFilterBitsReader&) = delete;
170158
void operator=(const FullFilterBitsReader&) = delete;
@@ -177,11 +165,6 @@ class FullFilterBitsReader : public FilterBitsReader {
177165
// if the key was not on the list, but it should aim to return false with a
178166
// high probability.
179167
bool MayMatch(const Slice& key) override {
180-
if (data_len_ <= 5) { // remain same with original filter
181-
return false;
182-
}
183-
// Other Error params, including a broken filter, regarded as match
184-
if (num_probes_ == 0 || num_lines_ == 0) return true;
185168
uint32_t hash = BloomHash(key);
186169
uint32_t byte_offset;
187170
LegacyFullFilterImpl::PrepareHashMayMatch(
@@ -191,17 +174,6 @@ class FullFilterBitsReader : public FilterBitsReader {
191174
}
192175

193176
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
194-
if (data_len_ <= 5) { // remain same with original filter
195-
for (int i = 0; i < num_keys; ++i) {
196-
may_match[i] = false;
197-
}
198-
return;
199-
}
200-
for (int i = 0; i < num_keys; ++i) {
201-
may_match[i] = true;
202-
}
203-
// Other Error params, including a broken filter, regarded as match
204-
if (num_probes_ == 0 || num_lines_ == 0) return;
205177
uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE];
206178
uint32_t byte_offsets[MultiGetContext::MAX_BATCH_SIZE];
207179
for (int i = 0; i < num_keys; ++i) {
@@ -210,42 +182,20 @@ class FullFilterBitsReader : public FilterBitsReader {
210182
/*out*/ &byte_offsets[i],
211183
log2_cache_line_size_);
212184
}
213-
214185
for (int i = 0; i < num_keys; ++i) {
215-
if (!LegacyFullFilterImpl::HashMayMatchPrepared(hashes[i], num_probes_,
216-
data_ + byte_offsets[i],
217-
log2_cache_line_size_)) {
218-
may_match[i] = false;
219-
}
186+
may_match[i] = LegacyFullFilterImpl::HashMayMatchPrepared(
187+
hashes[i], num_probes_, data_ + byte_offsets[i],
188+
log2_cache_line_size_);
220189
}
221190
}
222191

223192
private:
224-
// Filter meta data
225193
const char* data_;
226-
uint32_t data_len_;
227-
int num_probes_;
228-
uint32_t num_lines_;
229-
uint32_t log2_cache_line_size_;
230-
231-
// Get num_probes, and num_lines from filter
232-
// If filter format broken, set both to 0.
233-
void GetFilterMeta(const Slice& filter, int* num_probes, uint32_t* num_lines);
194+
const int num_probes_;
195+
const uint32_t num_lines_;
196+
const uint32_t log2_cache_line_size_;
234197
};
235198

236-
void FullFilterBitsReader::GetFilterMeta(const Slice& filter, int* num_probes,
237-
uint32_t* num_lines) {
238-
uint32_t len = static_cast<uint32_t>(filter.size());
239-
if (len <= 5) {
240-
// filter is empty or broken
241-
*num_probes = 0;
242-
*num_lines = 0;
243-
return;
244-
}
245-
246-
*num_probes = filter.data()[len - 5];
247-
*num_lines = DecodeFixed32(filter.data() + len - 4);
248-
}
249199

250200
// An implementation of filter policy
251201
class BloomFilterPolicy : public FilterPolicy {
@@ -311,8 +261,60 @@ class BloomFilterPolicy : public FilterPolicy {
311261
return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
312262
}
313263

264+
// Read metadata to determine what kind of FilterBitsReader is needed
265+
// and return a new one.
314266
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
315-
return new FullFilterBitsReader(contents);
267+
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
268+
if (len_with_meta <= 5) {
269+
// filter is empty or broken. Treat like zero keys added.
270+
return new AlwaysFalseFilter();
271+
}
272+
273+
char raw_num_probes = contents.data()[len_with_meta - 5];
274+
// NB: *num_probes > 30 and < 128 probably have not been used, because of
275+
// BloomFilterPolicy::initialize, unless directly calling
276+
// FullFilterBitsBuilder as an API, but we are leaving those cases in
277+
// limbo with FullFilterBitsReader for now.
278+
279+
if (raw_num_probes < 1) {
280+
// Treat as zero probes (always FP) for now.
281+
// NB: < 0 (or unsigned > 127) effectively reserved for future use.
282+
return new AlwaysTrueFilter();
283+
}
284+
// else attempt decode for FullFilterBitsReader
285+
286+
int num_probes = raw_num_probes;
287+
assert(num_probes >= 1);
288+
assert(num_probes <= 127);
289+
290+
uint32_t len = len_with_meta - 5;
291+
assert(len > 0);
292+
293+
uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4);
294+
uint32_t log2_cache_line_size;
295+
296+
if (num_lines * CACHE_LINE_SIZE == len) {
297+
// Common case
298+
log2_cache_line_size = folly::constexpr_log2(CACHE_LINE_SIZE);
299+
} else if (num_lines == 0 || len % num_lines != 0) {
300+
// Invalid (no solution to num_lines * x == len)
301+
// Treat as zero probes (always FP) for now.
302+
return new AlwaysTrueFilter();
303+
} else {
304+
// Determine the non-native cache line size (from another system)
305+
log2_cache_line_size = 0;
306+
while ((num_lines << log2_cache_line_size) < len) {
307+
++log2_cache_line_size;
308+
}
309+
if ((num_lines << log2_cache_line_size) != len) {
310+
// Invalid (block size not a power of two)
311+
// Treat as zero probes (always FP) for now.
312+
return new AlwaysTrueFilter();
313+
}
314+
}
315+
// if not early return
316+
return new FullFilterBitsReader(contents.data(), num_probes, num_lines,
317+
log2_cache_line_size);
316318
}
317319

318320
// If choose to use block based builder

util/bloom_test.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -589,14 +589,11 @@ TEST_F(FullBloomTest, CorruptFilters) {
589589
ASSERT_TRUE(Matches("hello"));
590590
ASSERT_TRUE(Matches("world"));
591591

592-
// Bad filter bits
592+
// Bad filter bits - returns true for safety
593593
// 65 bytes is not a power of two, so not a legal cache line size
594594
OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
595-
// ASSERT_TRUE(Matches("hello"));
596-
// ASSERT_TRUE(Matches("world"));
597-
// NB: NOT PROPERLY CHECKED in implementation
598-
ASSERT_EQ(fill, Matches("hello"));
599-
ASSERT_EQ(fill, Matches("world"));
595+
ASSERT_TRUE(Matches("hello"));
596+
ASSERT_TRUE(Matches("world"));
600597

601598
// Bad filter bits - returns false as if built from zero keys
602599
// < 5 bytes overall means missing even metadata

0 commit comments

Comments
 (0)