Skip to content

Commit 2b9889f

Browse files
Merge pull request #60082 from amosbird/non-trivial-count-optimization
Refactor prewhere and primary key optimization
2 parents 048f7ec + 3167dfd commit 2b9889f

104 files changed

Lines changed: 1511 additions & 1348 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/Access/AccessControl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ class AccessControl : public MultipleAccessStorage
164164
int getBcryptWorkfactor() const;
165165

166166
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
167-
/// For example, if there two users A, B and a row policy is defined only for A, then
167+
/// For example, if there are two users A, B and a row policy is defined only for A, then
168168
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
169169
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
170170
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }

src/Analyzer/Utils.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ QueryTreeNodes extractAllTableReferences(const QueryTreeNodePtr & tree)
388388
return result;
389389
}
390390

391-
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join)
391+
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join, bool recursive)
392392
{
393393
QueryTreeNodes result;
394394

@@ -406,12 +406,25 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node,
406406
{
407407
case QueryTreeNodeType::TABLE:
408408
[[fallthrough]];
409+
case QueryTreeNodeType::TABLE_FUNCTION:
410+
{
411+
result.push_back(std::move(node_to_process));
412+
break;
413+
}
409414
case QueryTreeNodeType::QUERY:
410-
[[fallthrough]];
415+
{
416+
if (recursive)
417+
nodes_to_process.push_back(node_to_process->as<QueryNode>()->getJoinTree());
418+
result.push_back(std::move(node_to_process));
419+
break;
420+
}
411421
case QueryTreeNodeType::UNION:
412-
[[fallthrough]];
413-
case QueryTreeNodeType::TABLE_FUNCTION:
414422
{
423+
if (recursive)
424+
{
425+
for (const auto & union_node : node_to_process->as<UnionNode>()->getQueries().getNodes())
426+
nodes_to_process.push_back(union_node);
427+
}
415428
result.push_back(std::move(node_to_process));
416429
break;
417430
}

src/Analyzer/Utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
5454
QueryTreeNodes extractAllTableReferences(const QueryTreeNodePtr & tree);
5555

5656
/// Extract table, table function, query, union from join tree.
57-
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join = false);
57+
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node, bool add_array_join = false, bool recursive = false);
5858

5959
/// Extract left table expression from join tree.
6060
QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node);

src/Interpreters/InterpreterSelectQuery.cpp

Lines changed: 73 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
#include <Storages/IStorage.h>
7777
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
7878
#include <Storages/StorageDistributed.h>
79+
#include <Storages/StorageDummy.h>
80+
#include <Storages/StorageMerge.h>
7981
#include <Storages/StorageValues.h>
8082
#include <Storages/StorageView.h>
8183

@@ -224,8 +226,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
224226
const StoragePtr & storage_,
225227
const StorageMetadataPtr & metadata_snapshot_,
226228
const SelectQueryOptions & options_)
227-
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
228-
{}
229+
: InterpreterSelectQuery(
230+
query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
231+
{
232+
}
229233

230234
InterpreterSelectQuery::InterpreterSelectQuery(
231235
const ASTPtr & query_ptr_,
@@ -600,7 +604,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
600604
query.setFinal();
601605
}
602606

603-
auto analyze = [&] (bool try_move_to_prewhere)
607+
auto analyze = [&] ()
604608
{
605609
/// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
606610
ASTPtr view_table;
@@ -618,7 +622,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
618622
required_result_column_names,
619623
table_join);
620624

621-
622625
query_info.syntax_analyzer_result = syntax_analyzer_result;
623626
context->setDistributed(syntax_analyzer_result->is_remote_storage);
624627

@@ -632,38 +635,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
632635
view = nullptr;
633636
}
634637

635-
if (try_move_to_prewhere
636-
&& storage && storage->canMoveConditionsToPrewhere()
637-
&& query.where() && !query.prewhere()
638-
&& !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
639-
{
640-
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
641-
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
642-
{
643-
/// Extract column compressed sizes.
644-
std::unordered_map<std::string, UInt64> column_compressed_sizes;
645-
for (const auto & [name, sizes] : column_sizes)
646-
column_compressed_sizes[name] = sizes.data_compressed;
647-
648-
SelectQueryInfo current_info;
649-
current_info.query = query_ptr;
650-
current_info.syntax_analyzer_result = syntax_analyzer_result;
651-
652-
Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
653-
const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
654-
655-
MergeTreeWhereOptimizer where_optimizer{
656-
std::move(column_compressed_sizes),
657-
metadata_snapshot,
658-
storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
659-
queried_columns,
660-
supported_prewhere_columns,
661-
log};
662-
663-
where_optimizer.optimize(current_info, context);
664-
}
665-
}
666-
667638
if (query.prewhere() && query.where())
668639
{
669640
/// Filter block in WHERE instead to get better performance
@@ -777,7 +748,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
777748
result_header = getSampleBlockImpl();
778749
};
779750

780-
analyze(shouldMoveToPrewhere());
751+
analyze();
781752

782753
bool need_analyze_again = false;
783754
bool can_analyze_again = false;
@@ -821,9 +792,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
821792
/// Reuse already built sets for multiple passes of analysis
822793
prepared_sets = query_analyzer->getPreparedSets();
823794

824-
/// Do not try move conditions to PREWHERE for the second time.
825-
/// Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later.
826-
analyze(/* try_move_to_prewhere = */ false);
795+
analyze();
827796
}
828797

829798
/// If there is no WHERE, filter blocks as usual
@@ -901,7 +870,24 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis()
901870
}
902871

903872
ActionDAGNodes added_filter_nodes = MergeTreeData::getFiltersForPrimaryKeyAnalysis(*this);
904-
UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy, added_filter_nodes);
873+
if (query_info_copy.prewhere_info)
874+
{
875+
{
876+
const auto & node
877+
= query_info_copy.prewhere_info->prewhere_actions->findInOutputs(query_info_copy.prewhere_info->prewhere_column_name);
878+
added_filter_nodes.nodes.push_back(&node);
879+
}
880+
881+
if (query_info_copy.prewhere_info->row_level_filter)
882+
{
883+
const auto & node
884+
= query_info_copy.prewhere_info->row_level_filter->findInOutputs(query_info_copy.prewhere_info->row_level_column_name);
885+
added_filter_nodes.nodes.push_back(&node);
886+
}
887+
}
888+
889+
query_info_copy.filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
890+
UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy);
905891
/// Note that we treat an estimation of 0 rows as a real estimation
906892
size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
907893
LOG_TRACE(log, "Estimated {} rows to read. It is enough work for {} parallel replicas", rows_to_read, number_of_replicas_to_use);
@@ -2336,6 +2322,49 @@ UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const
23362322
return 0;
23372323
}
23382324

2325+
/** Storages can rely that filters that for storage will be available for analysis before
2326+
* plan is fully constructed and optimized.
2327+
*
2328+
* StorageMerge common header calculation and prewhere push-down relies on this.
2329+
*
2330+
* This is similar to Planner::collectFiltersForAnalysis
2331+
*/
2332+
void collectFiltersForAnalysis(
2333+
const ASTPtr & query_ptr,
2334+
const ContextPtr & query_context,
2335+
const StorageSnapshotPtr & storage_snapshot,
2336+
const SelectQueryOptions & options,
2337+
SelectQueryInfo & query_info)
2338+
{
2339+
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
2340+
2341+
auto dummy = std::make_shared<StorageDummy>(
2342+
storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot);
2343+
2344+
QueryPlan query_plan;
2345+
InterpreterSelectQuery(query_ptr, query_context, dummy, dummy->getInMemoryMetadataPtr(), options).buildQueryPlan(query_plan);
2346+
2347+
auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context);
2348+
query_plan.optimize(optimization_settings);
2349+
2350+
std::vector<QueryPlan::Node *> nodes_to_process;
2351+
nodes_to_process.push_back(query_plan.getRootNode());
2352+
2353+
while (!nodes_to_process.empty())
2354+
{
2355+
const auto * node_to_process = nodes_to_process.back();
2356+
nodes_to_process.pop_back();
2357+
nodes_to_process.insert(nodes_to_process.end(), node_to_process->children.begin(), node_to_process->children.end());
2358+
2359+
auto * read_from_dummy = typeid_cast<ReadFromDummy *>(node_to_process->step.get());
2360+
if (!read_from_dummy)
2361+
continue;
2362+
2363+
query_info.filter_actions_dag = read_from_dummy->getFilterActionsDAG();
2364+
query_info.optimized_prewhere_info = read_from_dummy->getPrewhereInfo();
2365+
}
2366+
}
2367+
23392368
void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
23402369
{
23412370
auto & query = getSelectQuery();
@@ -2462,6 +2491,9 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
24622491
}
24632492
else if (storage)
24642493
{
2494+
if (typeid_cast<const StorageMerge *>(storage.get()))
2495+
collectFiltersForAnalysis(query_ptr, context, storage_snapshot, options, query_info);
2496+
24652497
/// Table.
24662498
if (max_streams == 0)
24672499
max_streams = 1;

src/Interpreters/SubstituteColumnOptimizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ struct StorageInMemoryMetadata;
1515
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
1616

1717
/// Optimizer that tries to replace columns to equal columns (according to constraints)
18-
/// with lower size (accorsing to compressed and uncomressed size).
18+
/// with lower size (according to compressed and uncomressed size).
1919
class SubstituteColumnOptimizer
2020
{
2121
public:

src/Interpreters/executeQuery.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,9 @@ void logExceptionBeforeStart(
603603
if (auto txn = context->getCurrentTransaction())
604604
elem.tid = txn->tid;
605605

606+
if (settings.log_query_settings)
607+
elem.query_settings = std::make_shared<Settings>(context->getSettingsRef());
608+
606609
if (settings.calculate_text_stack_trace)
607610
setExceptionStackTrace(elem);
608611
logException(context, elem);

src/Interpreters/getHeaderForProcessingStage.cpp

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,7 @@ Block getHeaderForProcessingStage(
9898
case QueryProcessingStage::FetchColumns:
9999
{
100100
Block header = storage_snapshot->getSampleBlockForColumns(column_names);
101-
102-
if (query_info.prewhere_info)
103-
{
104-
auto & prewhere_info = *query_info.prewhere_info;
105-
106-
if (prewhere_info.row_level_filter)
107-
{
108-
header = prewhere_info.row_level_filter->updateHeader(std::move(header));
109-
header.erase(prewhere_info.row_level_column_name);
110-
}
111-
112-
if (prewhere_info.prewhere_actions)
113-
header = prewhere_info.prewhere_actions->updateHeader(std::move(header));
114-
115-
if (prewhere_info.remove_prewhere_column)
116-
header.erase(prewhere_info.prewhere_column_name);
117-
}
101+
header = SourceStepWithFilter::applyPrewhereActions(header, query_info.prewhere_info);
118102
return header;
119103
}
120104
case QueryProcessingStage::WithMergeableState:
@@ -153,7 +137,8 @@ Block getHeaderForProcessingStage(
153137

154138
if (context->getSettingsRef().allow_experimental_analyzer)
155139
{
156-
auto storage = std::make_shared<StorageDummy>(storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns());
140+
auto storage = std::make_shared<StorageDummy>(
141+
storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns(), storage_snapshot);
157142
InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze());
158143
result = interpreter.getSampleBlock();
159144
}

0 commit comments

Comments
 (0)