7676#include < Storages/IStorage.h>
7777#include < Storages/MergeTree/MergeTreeWhereOptimizer.h>
7878#include < Storages/StorageDistributed.h>
79+ #include < Storages/StorageDummy.h>
80+ #include < Storages/StorageMerge.h>
7981#include < Storages/StorageValues.h>
8082#include < Storages/StorageView.h>
8183
@@ -224,8 +226,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
224226 const StoragePtr & storage_,
225227 const StorageMetadataPtr & metadata_snapshot_,
226228 const SelectQueryOptions & options_)
227- : InterpreterSelectQuery(query_ptr_, context_, std::nullopt , storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
228- {}
229+ : InterpreterSelectQuery(
230+ query_ptr_, context_, std::nullopt , storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
231+ {
232+ }
229233
230234InterpreterSelectQuery::InterpreterSelectQuery (
231235 const ASTPtr & query_ptr_,
@@ -600,7 +604,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
600604 query.setFinal ();
601605 }
602606
603- auto analyze = [&] (bool try_move_to_prewhere )
607+ auto analyze = [&] ()
604608 {
605609 // / Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
606610 ASTPtr view_table;
@@ -618,7 +622,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
618622 required_result_column_names,
619623 table_join);
620624
621-
622625 query_info.syntax_analyzer_result = syntax_analyzer_result;
623626 context->setDistributed (syntax_analyzer_result->is_remote_storage );
624627
@@ -632,38 +635,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
632635 view = nullptr ;
633636 }
634637
635- if (try_move_to_prewhere
636- && storage && storage->canMoveConditionsToPrewhere ()
637- && query.where () && !query.prewhere ()
638- && !query.hasJoin ()) // / Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not.
639- {
640- // / PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
641- if (const auto & column_sizes = storage->getColumnSizes (); !column_sizes.empty ())
642- {
643- // / Extract column compressed sizes.
644- std::unordered_map<std::string, UInt64> column_compressed_sizes;
645- for (const auto & [name, sizes] : column_sizes)
646- column_compressed_sizes[name] = sizes.data_compressed ;
647-
648- SelectQueryInfo current_info;
649- current_info.query = query_ptr;
650- current_info.syntax_analyzer_result = syntax_analyzer_result;
651-
652- Names queried_columns = syntax_analyzer_result->requiredSourceColumns ();
653- const auto & supported_prewhere_columns = storage->supportedPrewhereColumns ();
654-
655- MergeTreeWhereOptimizer where_optimizer{
656- std::move (column_compressed_sizes),
657- metadata_snapshot,
658- storage->getConditionEstimatorByPredicate (query_info, storage_snapshot, context),
659- queried_columns,
660- supported_prewhere_columns,
661- log};
662-
663- where_optimizer.optimize (current_info, context);
664- }
665- }
666-
667638 if (query.prewhere () && query.where ())
668639 {
669640 // / Filter block in WHERE instead to get better performance
@@ -777,7 +748,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
777748 result_header = getSampleBlockImpl ();
778749 };
779750
780- analyze (shouldMoveToPrewhere () );
751+ analyze ();
781752
782753 bool need_analyze_again = false ;
783754 bool can_analyze_again = false ;
@@ -821,9 +792,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
821792 // / Reuse already built sets for multiple passes of analysis
822793 prepared_sets = query_analyzer->getPreparedSets ();
823794
824- // / Do not try move conditions to PREWHERE for the second time.
825- // / Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later.
826- analyze (/* try_move_to_prewhere = */ false );
795+ analyze ();
827796 }
828797
829798 // / If there is no WHERE, filter blocks as usual
@@ -901,7 +870,24 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis()
901870 }
902871
903872 ActionDAGNodes added_filter_nodes = MergeTreeData::getFiltersForPrimaryKeyAnalysis (*this );
904- UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead (context, storage_snapshot, query_info_copy, added_filter_nodes);
873+ if (query_info_copy.prewhere_info )
874+ {
875+ {
876+ const auto & node
877+ = query_info_copy.prewhere_info ->prewhere_actions ->findInOutputs (query_info_copy.prewhere_info ->prewhere_column_name );
878+ added_filter_nodes.nodes .push_back (&node);
879+ }
880+
881+ if (query_info_copy.prewhere_info ->row_level_filter )
882+ {
883+ const auto & node
884+ = query_info_copy.prewhere_info ->row_level_filter ->findInOutputs (query_info_copy.prewhere_info ->row_level_column_name );
885+ added_filter_nodes.nodes .push_back (&node);
886+ }
887+ }
888+
889+ query_info_copy.filter_actions_dag = ActionsDAG::buildFilterActionsDAG (added_filter_nodes.nodes );
890+ UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead (context, storage_snapshot, query_info_copy);
905891 // / Note that we treat an estimation of 0 rows as a real estimation
906892 size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica ;
907893 LOG_TRACE (log, " Estimated {} rows to read. It is enough work for {} parallel replicas" , rows_to_read, number_of_replicas_to_use);
@@ -2336,6 +2322,49 @@ UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const
23362322 return 0 ;
23372323}
23382324
2325+ /* * Storages can rely that filters that for storage will be available for analysis before
2326+ * plan is fully constructed and optimized.
2327+ *
2328+ * StorageMerge common header calculation and prewhere push-down relies on this.
2329+ *
2330+ * This is similar to Planner::collectFiltersForAnalysis
2331+ */
2332+ void collectFiltersForAnalysis (
2333+ const ASTPtr & query_ptr,
2334+ const ContextPtr & query_context,
2335+ const StorageSnapshotPtr & storage_snapshot,
2336+ const SelectQueryOptions & options,
2337+ SelectQueryInfo & query_info)
2338+ {
2339+ auto get_column_options = GetColumnsOptions (GetColumnsOptions::All).withExtendedObjects ().withVirtuals ();
2340+
2341+ auto dummy = std::make_shared<StorageDummy>(
2342+ storage_snapshot->storage .getStorageID (), ColumnsDescription (storage_snapshot->getColumns (get_column_options)), storage_snapshot);
2343+
2344+ QueryPlan query_plan;
2345+ InterpreterSelectQuery (query_ptr, query_context, dummy, dummy->getInMemoryMetadataPtr (), options).buildQueryPlan (query_plan);
2346+
2347+ auto optimization_settings = QueryPlanOptimizationSettings::fromContext (query_context);
2348+ query_plan.optimize (optimization_settings);
2349+
2350+ std::vector<QueryPlan::Node *> nodes_to_process;
2351+ nodes_to_process.push_back (query_plan.getRootNode ());
2352+
2353+ while (!nodes_to_process.empty ())
2354+ {
2355+ const auto * node_to_process = nodes_to_process.back ();
2356+ nodes_to_process.pop_back ();
2357+ nodes_to_process.insert (nodes_to_process.end (), node_to_process->children .begin (), node_to_process->children .end ());
2358+
2359+ auto * read_from_dummy = typeid_cast<ReadFromDummy *>(node_to_process->step .get ());
2360+ if (!read_from_dummy)
2361+ continue ;
2362+
2363+ query_info.filter_actions_dag = read_from_dummy->getFilterActionsDAG ();
2364+ query_info.optimized_prewhere_info = read_from_dummy->getPrewhereInfo ();
2365+ }
2366+ }
2367+
23392368void InterpreterSelectQuery::executeFetchColumns (QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
23402369{
23412370 auto & query = getSelectQuery ();
@@ -2462,6 +2491,9 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
24622491 }
24632492 else if (storage)
24642493 {
2494+ if (typeid_cast<const StorageMerge *>(storage.get ()))
2495+ collectFiltersForAnalysis (query_ptr, context, storage_snapshot, options, query_info);
2496+
24652497 // / Table.
24662498 if (max_streams == 0 )
24672499 max_streams = 1 ;
0 commit comments