Skip to content

Commit 5d576bb

Browse files
committed
Merge branch 'master' into lower-contention-on-stacktrace-cache
2 parents 0bb5410 + ffc4046 commit 5d576bb

121 files changed

Lines changed: 2032 additions & 446 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-tidy

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ Checks: [
119119
'-readability-named-parameter',
120120
'-readability-redundant-declaration',
121121
'-readability-simplify-boolean-expr',
122-
'-readability-static-accessed-through-instance',
123122
'-readability-suspicious-call-argument',
124123
'-readability-uppercase-literal-suffix',
125124
'-readability-use-anyofallof',

base/poco/Foundation/src/pcre_compile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4835,7 +4835,7 @@ for (;; ptr++)
48354835
48364836
If the class contains characters outside the 0-255 range, a different
48374837
opcode is compiled. It may optionally have a bit map for characters < 256,
4838-
but those above are are explicitly listed afterwards. A flag byte tells
4838+
but those above are explicitly listed afterwards. A flag byte tells
48394839
whether the bitmap is present, and whether this is a negated class or not.
48404840
48414841
In JavaScript compatibility mode, an isolated ']' causes an error. In

contrib/avro-cmake/CMakeLists.txt

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
5959
target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
6060
target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
6161
target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
62-
63-
# create a symlink to include headers with <avro/...>
64-
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
65-
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
66-
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
67-
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
68-
)
69-
add_dependencies(_avrocpp avro_symlink_headers)
70-
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

contrib/libssh-cmake/CMakeLists.txt

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,40 @@
1-
option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES})
1+
option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES})
22

33
if (NOT ENABLE_SSH)
4-
message(STATUS "Not using SSH")
4+
message(STATUS "Not using libssh")
55
return()
66
endif()
77

8+
# CMake variables needed by libssh_version.h.cmake, update them when you update libssh
9+
set(libssh_VERSION_MAJOR 0)
10+
set(libssh_VERSION_MINOR 9)
11+
set(libssh_VERSION_PATCH 8)
12+
813
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
914
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
1015

11-
# Set CMake variables which are used in libssh_version.h.cmake
12-
project(libssh VERSION 0.9.8 LANGUAGES C)
13-
14-
set(LIBRARY_VERSION "4.8.8")
15-
set(LIBRARY_SOVERSION "4")
16-
17-
set(CMAKE_THREAD_PREFER_PTHREADS ON)
18-
set(THREADS_PREFER_PTHREAD_FLAG ON)
19-
20-
set(WITH_ZLIB OFF)
21-
set(WITH_SYMBOL_VERSIONING OFF)
22-
set(WITH_SERVER ON)
23-
2416
set(libssh_SRCS
2517
${LIB_SOURCE_DIR}/src/agent.c
2618
${LIB_SOURCE_DIR}/src/auth.c
2719
${LIB_SOURCE_DIR}/src/base64.c
2820
${LIB_SOURCE_DIR}/src/bignum.c
2921
${LIB_SOURCE_DIR}/src/buffer.c
3022
${LIB_SOURCE_DIR}/src/callbacks.c
23+
${LIB_SOURCE_DIR}/src/chachapoly.c
3124
${LIB_SOURCE_DIR}/src/channels.c
3225
${LIB_SOURCE_DIR}/src/client.c
3326
${LIB_SOURCE_DIR}/src/config.c
27+
${LIB_SOURCE_DIR}/src/config_parser.c
3428
${LIB_SOURCE_DIR}/src/connect.c
3529
${LIB_SOURCE_DIR}/src/connector.c
3630
${LIB_SOURCE_DIR}/src/curve25519.c
3731
${LIB_SOURCE_DIR}/src/dh.c
3832
${LIB_SOURCE_DIR}/src/ecdh.c
3933
${LIB_SOURCE_DIR}/src/error.c
34+
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
35+
${LIB_SOURCE_DIR}/src/external/blowfish.c
36+
${LIB_SOURCE_DIR}/src/external/chacha.c
37+
${LIB_SOURCE_DIR}/src/external/poly1305.c
4038
${LIB_SOURCE_DIR}/src/getpass.c
4139
${LIB_SOURCE_DIR}/src/init.c
4240
${LIB_SOURCE_DIR}/src/kdf.c
@@ -55,37 +53,32 @@ set(libssh_SRCS
5553
${LIB_SOURCE_DIR}/src/pcap.c
5654
${LIB_SOURCE_DIR}/src/pki.c
5755
${LIB_SOURCE_DIR}/src/pki_container_openssh.c
56+
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
5857
${LIB_SOURCE_DIR}/src/poll.c
59-
${LIB_SOURCE_DIR}/src/session.c
6058
${LIB_SOURCE_DIR}/src/scp.c
59+
${LIB_SOURCE_DIR}/src/session.c
6160
${LIB_SOURCE_DIR}/src/socket.c
6261
${LIB_SOURCE_DIR}/src/string.c
6362
${LIB_SOURCE_DIR}/src/threads.c
64-
${LIB_SOURCE_DIR}/src/wrapper.c
65-
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
66-
${LIB_SOURCE_DIR}/src/external/blowfish.c
67-
${LIB_SOURCE_DIR}/src/external/chacha.c
68-
${LIB_SOURCE_DIR}/src/external/poly1305.c
69-
${LIB_SOURCE_DIR}/src/chachapoly.c
70-
${LIB_SOURCE_DIR}/src/config_parser.c
7163
${LIB_SOURCE_DIR}/src/token.c
72-
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
64+
${LIB_SOURCE_DIR}/src/wrapper.c
65+
# some files of libssh/src/ are missing - why?
7366

7467
${LIB_SOURCE_DIR}/src/threads/noop.c
7568
${LIB_SOURCE_DIR}/src/threads/pthread.c
69+
# files missing - why?
7670

7771
# LIBCRYPT specific
78-
${libssh_SRCS}
79-
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
80-
${LIB_SOURCE_DIR}/src/pki_crypto.c
72+
${LIB_SOURCE_DIR}/src/dh_crypto.c
8173
${LIB_SOURCE_DIR}/src/ecdh_crypto.c
8274
${LIB_SOURCE_DIR}/src/libcrypto.c
83-
${LIB_SOURCE_DIR}/src/dh_crypto.c
75+
${LIB_SOURCE_DIR}/src/pki_crypto.c
76+
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
8477

85-
${LIB_SOURCE_DIR}/src/options.c
86-
${LIB_SOURCE_DIR}/src/server.c
8778
${LIB_SOURCE_DIR}/src/bind.c
8879
${LIB_SOURCE_DIR}/src/bind_config.c
80+
${LIB_SOURCE_DIR}/src/options.c
81+
${LIB_SOURCE_DIR}/src/server.c
8982
)
9083

9184
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
@@ -94,7 +87,7 @@ endif()
9487

9588
configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY)
9689

97-
add_library(_ssh STATIC ${libssh_SRCS})
90+
add_library(_ssh ${libssh_SRCS})
9891
add_library(ch_contrib::ssh ALIAS _ssh)
9992

10093
target_link_libraries(_ssh PRIVATE OpenSSL::Crypto)

docs/en/engines/table-engines/special/memory.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
4545
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
4646
```
4747

48+
**Modify settings**
49+
```sql
50+
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
51+
```
52+
4853
**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
4954

5055
## Examples {#examples}
@@ -97,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data
97102
│ 65536 │ 10000 │
98103
└─────────────┴────────────┘
99104
```
105+

docs/en/getting-started/example-datasets/opensky.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020"
77

88
The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.
99

10-
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
10+
Source: https://zenodo.org/records/5092942
1111

1212
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
1313
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
@@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021
1919
Run the command:
2020

2121
```bash
22-
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
22+
wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget
2323
```
2424

2525
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
@@ -127,15 +127,15 @@ Average flight distance is around 1000 km.
127127
Query:
128128

129129
```sql
130-
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
130+
SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky;
131131
```
132132

133133
Result:
134134

135135
```text
136-
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
137-
│ 1041090.6465708319 │
138-
────────────────────────────────────────────────────────────────────┘
136+
┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐
137+
1. 1041090.67 │ -- 1.04 million
138+
└──────────────────────────────────────────────────────────────────────────────┘
139139
```
140140

141141
### Most busy origin airports and the average distance seen {#busy-airports-average-distance}

docs/en/operations/query-cache.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
6767

6868
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
6969
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
70-
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
71-
may return cached results then.
70+
use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then.
7271

7372
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
7473
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
@@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa
175174
To force caching of results of queries with non-deterministic functions regardless, use setting
176175
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
177176

177+
Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force
178+
caching of results of queries with system tables regardless, use setting
179+
[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling).
180+
178181
:::note
179182
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
180183
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.

docs/en/operations/settings/merge-tree-settings.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ Default value: 0 (seconds)
287287

288288
## remote_fs_execute_merges_on_single_replica_time_threshold
289289

290-
When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
290+
When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
291291

292292
:::note Zero-copy replication is not ready for production
293293
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.

docs/en/operations/settings/settings.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,18 @@ Possible values:
16891689

16901690
Default value: `throw`.
16911691

1692+
## query_cache_system_table_handling {#query-cache-system-table-handling}
1693+
1694+
Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
1695+
1696+
Possible values:
1697+
1698+
- `'throw'` - Throw an exception and don't cache the query result.
1699+
- `'save'` - Cache the query result.
1700+
- `'ignore'` - Don't cache the query result and don't throw an exception.
1701+
1702+
Default value: `throw`.
1703+
16921704
## query_cache_min_query_runs {#query-cache-min-query-runs}
16931705

16941706
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
@@ -5302,7 +5314,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
53025314
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
53035315

53045316
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
5305-
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
5317+
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
53065318

53075319
Default value: `false`.
53085320

docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,36 +15,53 @@ The `uniqCombined` function is a good choice for calculating the number of diffe
1515

1616
**Arguments**
1717

18-
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
18+
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
19+
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
1920

20-
`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
2121

2222
**Returned value**
2323

2424
- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
2525

2626
**Implementation details**
2727

28-
Function:
28+
The `uniqCombined` function:
2929

3030
- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations.
31-
3231
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
33-
34-
For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
35-
32+
- For a small number of distinct elements, an array is used.
33+
- When the set size is larger, a hash table is used.
34+
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
3635
- Provides the result deterministically (it does not depend on the query processing order).
3736

3837
:::note
39-
Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
38+
Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64).
4039
:::
4140

42-
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`:
41+
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function:
4342

4443
- Consumes several times less memory.
4544
- Calculates with several times higher accuracy.
4645
- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network.
4746

47+
**Example**
48+
49+
Query:
50+
51+
```sql
52+
SELECT uniqCombined(number) FROM numbers(1e6);
53+
```
54+
55+
Result:
56+
57+
```response
58+
┌─uniqCombined(number)─┐
59+
│ 1001148 │ -- 1.00 million
60+
└──────────────────────┘
61+
```
62+
63+
See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs.
64+
4865
**See Also**
4966

5067
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)

0 commit comments

Comments
 (0)