Skip to content

Commit 27f7f09

Browse files
Merge pull request #4913 from yandex/fix_additional_seek
Fix bug with additional file processing in cache compressed buffer
2 parents 1c030e4 + 0b9b784 commit 27f7f09

4 files changed

Lines changed: 36 additions & 8 deletions

File tree

dbms/src/Compression/CachedCompressedReadBuffer.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ bool CachedCompressedReadBuffer::nextImpl()
3535
UInt128 key = cache->hash(path, file_pos);
3636
owned_cell = cache->get(key);
3737

38-
if (!owned_cell || !codec)
38+
if (!owned_cell)
3939
{
4040
/// If not, read it from the file.
4141
initInput();
@@ -49,21 +49,22 @@ bool CachedCompressedReadBuffer::nextImpl()
4949

5050
if (owned_cell->compressed_size)
5151
{
52-
owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer());
52+
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
53+
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
5354
decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
5455

55-
/// Put data into cache.
56-
cache->set(key, owned_cell);
5756
}
57+
58+
/// Put data into cache.
59+
/// NOTE: Even if we don't read anything (compressed_size == 0)
60+
/// because we can reuse this information and don't reopen file in future
61+
cache->set(key, owned_cell);
5862
}
5963

6064
if (owned_cell->data.size() == 0)
61-
{
62-
owned_cell = nullptr;
6365
return false;
64-
}
6566

66-
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - codec->getAdditionalSizeAtTheEndOfBuffer());
67+
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - owned_cell->additional_bytes);
6768

6869
file_pos += owned_cell->compressed_size;
6970

dbms/src/IO/UncompressedCache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct UncompressedCacheCell
2222
{
2323
Memory<> data;
2424
size_t compressed_size;
25+
UInt32 additional_bytes;
2526
};
2627

2728
struct UncompressedSizeWeightFunction
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0 36 14
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/env bash
2+
3+
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
4+
. $CURDIR/../shell_config.sh
5+
6+
7+
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.small_table"
8+
9+
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.small_table (a UInt64 default 0, n UInt64) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY (a);"
10+
11+
$CLICKHOUSE_CLIENT --query="INSERT INTO test.small_table(n) SELECT * from system.numbers limit 100000;"
12+
13+
cached_query="SELECT count() FROM test.small_table where n > 0;"
14+
15+
$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query="$cached_query" &> /dev/null
16+
17+
$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompressed-cache" --query="$cached_query" &> /dev/null
18+
19+
sleep 1
20+
$CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS"
21+
22+
$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) ORDER BY event_time DESC LIMIT 1"
23+
24+
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.small_table"
25+

0 commit comments

Comments
 (0)