Skip to content

Commit ef133bf

Browse files
committed
Merge remote-tracking branch 'upstream/master' into lower-memory-usage
2 parents 9d3b4e5 + 29d54da commit ef133bf

18 files changed

Lines changed: 382 additions & 28 deletions

contrib/corrosion-cmake/CMakeLists.txt

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
if (NOT ENABLE_LIBRARIES)
22
set(DEFAULT_ENABLE_RUST FALSE)
3-
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
4-
message(STATUS "Rust is not available on aarch64-apple-darwin")
5-
set(DEFAULT_ENABLE_RUST FALSE)
63
else()
74
list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
85
find_package(Rust)
@@ -19,7 +16,9 @@ message(STATUS "Checking Rust toolchain for current target")
1916

2017
# See https://doc.rust-lang.org/nightly/rustc/platform-support.html
2118

22-
if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
19+
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
20+
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
21+
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
2322
set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
2423
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
2524
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
@@ -29,14 +28,14 @@ elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
2928
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
3029
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
3130
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
31+
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "darwin"))
32+
set(Rust_CARGO_TARGET "aarch64-apple-darwin")
3233
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
3334
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
3435
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
3536
set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
36-
endif()
37-
38-
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
39-
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
37+
else()
38+
message(FATAL_ERROR "Unsupported rust target")
4039
endif()
4140

4241
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")

docs/en/sql-reference/dictionaries/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,6 +1805,7 @@ Example of settings:
18051805
``` xml
18061806
<source>
18071807
<postgresql>
1808+
<host>postgresql-hostname</hoat>
18081809
<port>5432</port>
18091810
<user>clickhouse</user>
18101811
<password>qwerty</password>

docs/en/sql-reference/statements/alter/view.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ sidebar_label: VIEW
88

99
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
1010

11-
The `allow_experimental_alter_materialized_view_structure` setting must be enabled.
12-
1311
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
1412

1513
**Example with TO table**

docs/en/sql-reference/statements/create/view.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ This feature is deprecated and will be removed in the future.
9797

9898
For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)
9999

100-
## Refreshable Materialized View {#refreshable-materialized-view}
100+
## Refreshable Materialized View [Experimental] {#refreshable-materialized-view}
101101

102102
```sql
103103
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
@@ -120,7 +120,8 @@ Differences from regular non-refreshable materialized views:
120120

121121
:::note
122122
Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations:
123-
* not compatible with Replicated database or table engines,
123+
* not compatible with Replicated database or table engines
124+
* It is not supported in ClickHouse Cloud
124125
* require [Atomic database engine](../../../engines/database-engines/atomic.md),
125126
* no retries for failed refresh - we just skip to the next scheduled refresh time,
126127
* no limit on number of concurrent refreshes.
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#include <Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h>
2+
#include <Analyzer/ConstantNode.h>
3+
#include <Analyzer/FunctionNode.h>
4+
#include <Analyzer/InDepthQueryTreeVisitor.h>
5+
#include <Analyzer/IQueryTreeNode.h>
6+
#include <DataTypes/IDataType.h>
7+
#include <Interpreters/ExternalDictionariesLoader.h>
8+
9+
namespace DB
10+
{
11+
12+
namespace
13+
{
14+
15+
const std::unordered_set<String> possibly_injective_function_names
16+
{
17+
"dictGet",
18+
"dictGetString",
19+
"dictGetUInt8",
20+
"dictGetUInt16",
21+
"dictGetUInt32",
22+
"dictGetUInt64",
23+
"dictGetInt8",
24+
"dictGetInt16",
25+
"dictGetInt32",
26+
"dictGetInt64",
27+
"dictGetFloat32",
28+
"dictGetFloat64",
29+
"dictGetDate",
30+
"dictGetDateTime"
31+
};
32+
33+
class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>
34+
{
35+
using Base = InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>;
36+
public:
37+
explicit OptimizeGroupByInjectiveFunctionsVisitor(ContextPtr context)
38+
: Base(std::move(context))
39+
{}
40+
41+
void enterImpl(QueryTreeNodePtr & node)
42+
{
43+
if (!getSettings().optimize_injective_functions_in_group_by)
44+
return;
45+
46+
auto * query = node->as<QueryNode>();
47+
if (!query)
48+
return;
49+
50+
if (!query->hasGroupBy())
51+
return;
52+
53+
if (query->isGroupByWithCube() || query->isGroupByWithRollup())
54+
return;
55+
56+
auto & group_by = query->getGroupBy().getNodes();
57+
if (query->isGroupByWithGroupingSets())
58+
{
59+
for (auto & set : group_by)
60+
{
61+
auto & grouping_set = set->as<ListNode>()->getNodes();
62+
optimizeGroupingSet(grouping_set);
63+
}
64+
}
65+
else
66+
optimizeGroupingSet(group_by);
67+
}
68+
69+
private:
70+
void optimizeGroupingSet(QueryTreeNodes & grouping_set)
71+
{
72+
auto context = getContext();
73+
74+
QueryTreeNodes new_group_by_keys;
75+
new_group_by_keys.reserve(grouping_set.size());
76+
for (auto & group_by_elem : grouping_set)
77+
{
78+
std::queue<QueryTreeNodePtr> nodes_to_process;
79+
nodes_to_process.push(group_by_elem);
80+
81+
while (!nodes_to_process.empty())
82+
{
83+
auto node_to_process = nodes_to_process.front();
84+
nodes_to_process.pop();
85+
86+
auto const * function_node = node_to_process->as<FunctionNode>();
87+
if (!function_node)
88+
{
89+
// Constant aggregation keys are removed in PlannerExpressionAnalysis.cpp
90+
new_group_by_keys.push_back(node_to_process);
91+
continue;
92+
}
93+
94+
// Aggregate functions are not allowed in GROUP BY clause
95+
auto function = function_node->getFunctionOrThrow();
96+
bool can_be_eliminated = function->isInjective(function_node->getArgumentColumns());
97+
98+
if (can_be_eliminated)
99+
{
100+
for (auto const & argument : function_node->getArguments())
101+
{
102+
// We can skip constants here because aggregation key is already not a constant.
103+
if (argument->getNodeType() != QueryTreeNodeType::CONSTANT)
104+
nodes_to_process.push(argument);
105+
}
106+
}
107+
else
108+
new_group_by_keys.push_back(node_to_process);
109+
}
110+
}
111+
112+
grouping_set = std::move(new_group_by_keys);
113+
}
114+
};
115+
116+
}
117+
118+
void OptimizeGroupByInjectiveFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
119+
{
120+
OptimizeGroupByInjectiveFunctionsVisitor visitor(std::move(context));
121+
visitor.visit(query_tree_node);
122+
}
123+
124+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#pragma once
2+
3+
#include <Analyzer/IQueryTreePass.h>
4+
5+
namespace DB
6+
{
7+
8+
/* Eliminates injective functions in GROUP BY section.
9+
*/
10+
class OptimizeGroupByInjectiveFunctionsPass final : public IQueryTreePass
11+
{
12+
public:
13+
String getName() override { return "OptimizeGroupByInjectiveFunctionsPass"; }
14+
15+
String getDescription() override { return "Replaces injective functions by it's arguments in GROUP BY section."; }
16+
17+
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
18+
};
19+
20+
}

src/Analyzer/Passes/QueryAnalysisPass.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2321,11 +2321,15 @@ std::pair<bool, UInt64> QueryAnalyzer::recursivelyCollectMaxOrdinaryExpressions(
23212321
*/
23222322
void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
23232323
{
2324+
if (!query_tree_node_typed.isGroupByAll())
2325+
return;
2326+
23242327
auto & group_by_nodes = query_tree_node_typed.getGroupBy().getNodes();
23252328
auto & projection_list = query_tree_node_typed.getProjection();
23262329

23272330
for (auto & node : projection_list.getNodes())
23282331
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
2332+
query_tree_node_typed.setIsGroupByAll(false);
23292333
}
23302334

23312335
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
@@ -7422,8 +7426,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
74227426
node->removeAlias();
74237427
}
74247428

7425-
if (query_node_typed.isGroupByAll())
7426-
expandGroupByAll(query_node_typed);
7429+
expandGroupByAll(query_node_typed);
74277430

74287431
validateFilters(query_node);
74297432
validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls });

src/Analyzer/QueryTreePassManager.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <memory>
44

55
#include <Common/Exception.h>
6+
#include "Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.h"
67

78
#include <IO/WriteHelpers.h>
89
#include <IO/Operators.h>
@@ -163,8 +164,6 @@ class ValidationChecker : public InDepthQueryTreeVisitor<ValidationChecker>
163164

164165
/** ClickHouse query tree pass manager.
165166
*
166-
* TODO: Support setting optimize_substitute_columns.
167-
* TODO: Support GROUP BY injective function elimination.
168167
* TODO: Support setting optimize_aggregators_of_group_by_keys.
169168
* TODO: Support setting optimize_monotonous_functions_in_order_by.
170169
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
@@ -268,6 +267,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
268267
manager.addPass(std::make_unique<AggregateFunctionsArithmericOperationsPass>());
269268
manager.addPass(std::make_unique<UniqInjectiveFunctionsEliminationPass>());
270269
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());
270+
manager.addPass(std::make_unique<OptimizeGroupByInjectiveFunctionsPass>());
271271

272272
manager.addPass(std::make_unique<MultiIfToIfPass>());
273273
manager.addPass(std::make_unique<IfConstantConditionPass>());

src/Core/Settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,7 @@ class IColumn;
699699
M(SetOperationMode, intersect_default_mode, SetOperationMode::ALL, "Set default mode in INTERSECT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception.", 0) \
700700
M(SetOperationMode, except_default_mode, SetOperationMode::ALL, "Set default mode in EXCEPT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception.", 0) \
701701
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
702+
M(Bool, optimize_injective_functions_in_group_by, true, "Replaces injective functions by it's arguments in GROUP BY section", 0) \
702703
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
703704
M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \
704705
M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \

src/Core/SettingsChangesHistory.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
9999
{"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
100100
{"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
101101
{"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
102-
{"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}}},
102+
{"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
103+
{"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}}},
103104
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
104105
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
105106
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

0 commit comments

Comments
 (0)