Skip to content

Indecently slow INSERT ... PARTITION BY #34348

@alexey-milovidov

Description

@alexey-milovidov

Describe the situation

std::__1::__hash_table<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::__unordered_map_hasher<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::__unordered_map_equal<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::allocator<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long> > >::__node_insert_unique_prepare(unsigned long, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>&)
    ./build/./contrib/libcxx/include/__hash_table:116
std::__1::__hash_table<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::__unordered_map_hasher<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::__unordered_map_equal<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::allocator<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long> > >::__node_insert_unique(std::__1::__hash_node<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, void*>*)
    ./build/./contrib/libcxx/include/__hash_table:1916
std::__1::pair<std::__1::__hash_iterator<std::__1::__hash_node<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, void*>*>, bool> std::__1::__hash_table<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::__unordered_map_hasher<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::__unordered_map_equal<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::allocator<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, unsigned long> > >::__emplace_unique_impl<StringRef&, unsigned long>(StringRef&, unsigned long&&)
    ./build/./contrib/libcxx/include/__hash_table:0
DB::PartitionedSink::consume(DB::Chunk)
    ./build/./src/Storages/PartitionedSink.cpp:0
DB::SinkToStorage::onConsume(DB::Chunk)
    ./build/./contrib/libcxx/include/memory:3211
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<DB::ExceptionKeepingTransform::work()::$_1, void ()> >(std::__1::__function::__policy_storage const*)
    ./build/./contrib/libcxx/include/memory:3211
DB::runStep(std::__1::function<void ()>, DB::ThreadStatus*, std::__1::atomic<unsigned long>*)
    ./build/./src/Processors/Transforms/ExceptionKeepingTransform.cpp:103
DB::ExceptionKeepingTransform::work()
    ./build/./contrib/libcxx/include/functional:2191
DB::ExecutionThreadContext::executeTask()
    ./build/./src/Processors/Executors/ExecutionThreadContext.cpp:65
DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*)
    ./build/./src/Processors/Executors/PipelineExecutor.cpp:213
DB::PipelineExecutor::executeImpl(unsigned long)
    ./build/./src/Processors/Executors/PipelineExecutor.cpp:0
DB::PipelineExecutor::execute(unsigned long)
    ./build/./contrib/libcxx/include/memory:1627
void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPool::ThreadFromGlobalPool<DB::CompletedPipelineExecutor::execute()::$_0>(DB::CompletedPipelineExecutor::execute()::$_0&&)::'lambda'(), void ()> >(std::__1::__function::__policy_storage const*)
    ./build/./src/Processors/Executors/CompletedPipelineExecutor.cpp:0
ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>)
    ./build/./contrib/libcxx/include/functional:2210
void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, int, std::__1::optional<unsigned long>)::'lambda0'()> >(void*)
    ./build/./contrib/libcxx/include/memory:1655

The issue introduced here: https://github.com/ClickHouse/ClickHouse/pull/23051/files#diff-a118e043f3175929dbe8a96c6dcc7db80e31ddae5581f29b4286eb9e2d9d925dR391

It is doing unnecessary copy into std::string. We can use the same StringRef as a key.
Also it is using std::unordered_map for no reason. Better to use HashTable.
The profile contains allocations, hash map iteration and hash function calculation.

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions