Skip to content

Conversation

@mrhhsg
Copy link
Member

@mrhhsg mrhhsg commented May 8, 2025

What problem does this PR solve?

If there is a mark join condition, then even in a right semi join, the columns from the left table involved in the mark join condition will still appear in the intermediate tuple.

This PR also completes the missing handling logic for right semi joins with mark join condition.

==2126730==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x503004ee02e0 at pc 0x55f4a7d3f354 bp 0x7f6b6b587e70 sp 0x7f6b6b587e68
READ of size 8 at 0x503004ee02e0 thread T1296 (brpc_light)
    #0 0x55f4a7d3f353 in std::__shared_ptr<doris::vectorized::IDataType const, (__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<doris::vectorized::IDataType const, (__gnu_cxx::_Lock_policy)2> const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/shared_ptr_base.h:1522:7
    #1 0x55f4a7d3f01e in std::shared_ptr<doris::vectorized::IDataType const>::shared_ptr(std::shared_ptr<doris::vectorized::IDataType const> const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/shared_ptr.h:204:7
    #2 0x55f4f8aa231c in doris::pipeline::HashJoinProbeOperatorX::prepare(doris::RuntimeState*) /root/doris/be/src/pipeline/exec/hashjoin_probe_operator.cpp:577:33
    #3 0x55f4fb1e4a28 in doris::pipeline::Pipeline::prepare(doris::RuntimeState*) /root/doris/be/src/pipeline/pipeline.cpp:89:5
    #4 0x55f4fb089d3f in doris::pipeline::PipelineFragmentContext::prepare(doris::TPipelineFragmentParams const&, doris::ThreadPool*) /root/doris/be/src/pipeline/pipeline_fragment_context.cpp:352:9
    #5 0x55f4ad615a5c in doris::FragmentMgr::exec_plan_fragment(doris::TPipelineFragmentParams const&, doris::QuerySource, std::function<void (doris::RuntimeState*, doris::Status*)> const&, doris::TPipelineFragmentParamsList const&) /root/doris/be/src/runtime/fragment_mgr.cpp:855:9
    #6 0x55f4ad613ca6 in doris::FragmentMgr::exec_plan_fragment(doris::TPipelineFragmentParams const&, doris::QuerySource, doris::TPipelineFragmentParamsList const&) /root/doris/be/src/runtime/fragment_mgr.cpp:634:16
    #7 0x55f4ae2e867c in doris::PInternalService::_exec_plan_fragment_impl(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, doris::PFragmentRequestVersion, bool, std::function<void (doris::RuntimeState*, doris::Status*)> const&) /root/doris/be/src/service/internal_service.cpp:613:17
    #8 0x55f4ae2e4b67 in doris::PInternalService::_exec_plan_fragment_in_pthread(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*) /root/doris/be/src/service/internal_service.cpp:343:14
    #9 0x55f4ae31d4e1 in doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0::operator()() const /root/doris/be/src/service/internal_service.cpp:367:9
    #10 0x55f4ae31d33e in void std::__invoke_impl<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>(std::__invoke_other, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    #11 0x55f4ae31d27e in std::enable_if<is_invocable_r_v<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>, void>::type std::__invoke_r<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>(doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
    #12 0x55f4ae31cf55 in std::_Function_handler<void (), doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0>::_M_invoke(std::_Any_data const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
    #13 0x55f4a74175af in std::function<void ()>::operator()() const /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
    #14 0x55f4ae3c21c4 in doris::WorkThreadPool<false>::work_thread(int) /root/doris/be/src/util/work_thread_pool.hpp:158:17
    #15 0x55f4ae3c4cc8 in void std::__invoke_impl<void, void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>(std::__invoke_memfun_deref, void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14
    #16 0x55f4ae3c4a92 in std::__invoke_result<void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>::type std::__invoke<void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>(void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
    #17 0x55f4ae3c49f8 in decltype(std::__invoke((*this)._M_pmf, std::forward<doris::WorkThreadPool<false>*&>(fp), std::forward<int&>(fp))) std::_Mem_fn_base<void (doris::WorkThreadPool<false>::*)(int), true>::operator()<doris::WorkThreadPool<false>*&, int&>(doris::WorkThreadPool<false>*&, int&) const /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:170:11
    #18 0x55f4ae3c4942 in void std::__invoke_impl<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>(std::__invoke_other, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    #19 0x55f4ae3c4752 in std::enable_if<is_invocable_r_v<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>, void>::type std::__invoke_r<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>(std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
    #20 0x55f4ae3c4664 in void std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>::__call<void, 0ul, 1ul>(std::tuple<>&&, std::_Index_tuple<0ul, 1ul>) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:654:11
    #21 0x55f4ae3c4395 in void std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>::operator()<>() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:713:17
    #22 0x55f4ae3c428e in void std::__invoke_impl<void, std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>(std::__invoke_other, std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>&&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    #23 0x55f4ae3c41ce in std::__invoke_result<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>::type std::__invoke<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>(std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>&&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
    #24 0x55f4ae3c417b in void std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>::_M_invoke<0ul>(std::_Index_tuple<0ul>) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:292:13
    #25 0x55f4ae3c40f6 in std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>::operator()() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:299:11
    #26 0x55f4ae3c3f34 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>>::_M_run() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:244:13
    #27 0x55f4fea05d2e in execute_native_thread_routine pthread_atfork.c
    #28 0x55f4a7162e0a in asan_thread_start(void*) crtstuff.c
    #29 0x7f7149c421c9 in start_thread (/lib64/libpthread.so.0+0x81c9) (BuildId: 7c4add5c7a885e6ff4ce17867d6a2286e4420eec)
    #30 0x7f714a6318d2 in clone (/lib64/libc.so.6+0x398d2) (BuildId: 4ee3325955e3b55b6805f33959b7cb77745ad625)

Issue Number: close #xxx

Related PR: #xxx

Problem Summary:

Release note

None

Check List (For Author)

  • Test

    • Regression test
    • Unit Test
    • Manual test (add detailed scripts or steps below)
    • No need to test or manual test. Explain why:
      • This is a refactor/code format and no logic has been changed.
      • Previous test can cover this change.
      • No code files have been changed.
      • Other reason
  • Behavior changed:

    • No.
    • Yes.
  • Does this need documentation?

    • No.
    • Yes.

Check List (For Reviewer who merge this PR)

  • Confirm the release note
  • Confirm test cases
  • Confirm document
  • Add branch pick label

@hello-stephen
Copy link
Contributor

Thank you for your contribution to Apache Doris.
Don't know what should be done next? See How to process your PR.

Please clearly describe your PR:

  1. What problem was fixed (it's best to include specific error reporting information). How it was fixed.
  2. Which behaviors were modified. What was the previous behavior, what is it now, why was it modified, and what possible impacts might there be.
  3. What features were added. Why was this function added?
  4. Which code was refactored and why was this part of the code refactored?
  5. Which functions were optimized and what is the difference before and after the optimization?

@mrhhsg
Copy link
Member Author

mrhhsg commented May 8, 2025

run buildall

@doris-robot
Copy link

TPC-H: Total hot run time: 34116 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpch-tools
Tpch sf100 test result on commit 3046af85c898e864dadeecbee49c3189e27aa36b, data reload: false

------ Round 1 ----------------------------------
q1	26190	5179	5110	5110
q2	2118	272	192	192
q3	10447	1278	734	734
q4	10251	994	532	532
q5	7984	2404	2347	2347
q6	191	169	134	134
q7	916	753	615	615
q8	9323	1264	1120	1120
q9	6829	5347	5157	5157
q10	6804	2312	1876	1876
q11	492	301	268	268
q12	354	358	210	210
q13	17782	3675	3081	3081
q14	232	227	219	219
q15	535	478	477	477
q16	423	429	374	374
q17	629	875	373	373
q18	7872	7258	7078	7078
q19	1225	981	560	560
q20	340	344	226	226
q21	4532	3431	2480	2480
q22	1050	1023	953	953
Total cold run time: 116519 ms
Total hot run time: 34116 ms

----- Round 2, with runtime_filter_mode=off -----
q1	5144	5149	5112	5112
q2	238	325	229	229
q3	2178	2689	2301	2301
q4	1425	1814	1511	1511
q5	4556	4432	4323	4323
q6	207	164	130	130
q7	1962	1897	1772	1772
q8	2577	2656	2516	2516
q9	7181	7130	7198	7130
q10	2975	3179	2684	2684
q11	581	524	485	485
q12	690	730	602	602
q13	3493	3871	3307	3307
q14	271	302	281	281
q15	520	478	475	475
q16	441	484	445	445
q17	1163	1592	1445	1445
q18	7775	7679	7271	7271
q19	831	849	938	849
q20	1976	2017	1914	1914
q21	5344	4510	4472	4472
q22	1022	1006	988	988
Total cold run time: 52550 ms
Total hot run time: 50242 ms

@doris-robot
Copy link

TPC-DS: Total hot run time: 184729 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpcds-tools
TPC-DS sf100 test result on commit 3046af85c898e864dadeecbee49c3189e27aa36b, data reload: false

query1	1018	470	502	470
query2	6561	1785	1758	1758
query3	6749	231	224	224
query4	27132	23777	23154	23154
query5	4358	650	475	475
query6	308	201	180	180
query7	4613	484	287	287
query8	285	237	231	231
query9	8602	2535	2544	2535
query10	482	299	254	254
query11	15539	14976	14750	14750
query12	159	111	112	111
query13	1650	525	408	408
query14	8747	6096	6155	6096
query15	202	188	167	167
query16	7131	635	464	464
query17	926	700	565	565
query18	1967	392	320	320
query19	181	185	146	146
query20	114	114	123	114
query21	210	120	102	102
query22	4069	4103	3987	3987
query23	33793	32853	32789	32789
query24	8458	2382	2378	2378
query25	525	438	401	401
query26	1244	261	152	152
query27	2769	490	334	334
query28	4388	2098	2071	2071
query29	770	558	450	450
query30	289	219	188	188
query31	929	868	790	790
query32	75	71	64	64
query33	561	382	325	325
query34	792	850	501	501
query35	758	813	726	726
query36	962	958	871	871
query37	112	109	80	80
query38	4072	4186	4147	4147
query39	1475	1440	1391	1391
query40	204	118	110	110
query41	58	60	54	54
query42	118	114	115	114
query43	502	476	457	457
query44	1329	801	784	784
query45	183	179	167	167
query46	811	1026	619	619
query47	1709	1804	1679	1679
query48	377	400	284	284
query49	786	529	442	442
query50	668	671	394	394
query51	4026	4089	4024	4024
query52	109	104	101	101
query53	226	252	183	183
query54	593	572	502	502
query55	85	81	81	81
query56	320	324	279	279
query57	1131	1149	1107	1107
query58	262	251	248	248
query59	2504	2587	2432	2432
query60	328	311	305	305
query61	127	130	128	128
query62	787	750	665	665
query63	231	190	189	189
query64	4364	1042	676	676
query65	4336	4220	4261	4220
query66	1135	413	323	323
query67	15611	15266	15111	15111
query68	7716	879	509	509
query69	475	300	255	255
query70	1174	1098	1116	1098
query71	429	308	294	294
query72	5610	4729	4819	4729
query73	698	649	349	349
query74	9107	9245	8877	8877
query75	3307	3203	2657	2657
query76	3251	1173	754	754
query77	601	398	290	290
query78	9850	9909	9214	9214
query79	2014	836	563	563
query80	599	508	462	462
query81	466	254	220	220
query82	417	123	102	102
query83	262	250	232	232
query84	303	102	96	96
query85	780	356	319	319
query86	323	304	292	292
query87	4326	4366	4369	4366
query88	3577	2208	2204	2204
query89	383	308	283	283
query90	1927	215	220	215
query91	145	144	109	109
query92	75	60	55	55
query93	1156	950	581	581
query94	716	420	297	297
query95	375	296	286	286
query96	485	563	278	278
query97	3130	3250	3146	3146
query98	237	211	196	196
query99	1704	1385	1267	1267
Total cold run time: 271513 ms
Total hot run time: 184729 ms

@doris-robot
Copy link

ClickBench: Total hot run time: 28.95 s
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/clickbench-tools
ClickBench test result on commit 3046af85c898e864dadeecbee49c3189e27aa36b, data reload: false

query1	0.04	0.04	0.03
query2	0.15	0.11	0.11
query3	0.26	0.20	0.20
query4	1.60	0.20	0.11
query5	0.56	0.58	0.56
query6	1.22	0.73	0.72
query7	0.03	0.01	0.01
query8	0.04	0.03	0.04
query9	0.58	0.52	0.50
query10	0.57	0.56	0.56
query11	0.16	0.11	0.11
query12	0.15	0.11	0.12
query13	0.62	0.59	0.60
query14	0.78	0.80	0.80
query15	0.87	0.85	0.86
query16	0.39	0.37	0.38
query17	1.05	1.03	1.03
query18	0.21	0.19	0.20
query19	1.90	1.75	1.80
query20	0.01	0.01	0.01
query21	15.43	0.90	0.57
query22	0.76	1.34	0.69
query23	14.76	1.34	0.62
query24	6.88	0.76	0.76
query25	0.53	0.20	0.05
query26	0.60	0.17	0.13
query27	0.05	0.05	0.06
query28	9.75	0.96	0.43
query29	12.57	4.03	3.31
query30	0.25	0.09	0.06
query31	2.83	0.63	0.38
query32	3.22	0.55	0.47
query33	3.12	3.07	3.04
query34	15.75	5.11	4.52
query35	4.54	4.56	4.44
query36	0.68	0.51	0.48
query37	0.09	0.06	0.06
query38	0.05	0.04	0.03
query39	0.03	0.02	0.02
query40	0.18	0.15	0.13
query41	0.08	0.02	0.02
query42	0.03	0.02	0.02
query43	0.04	0.03	0.03
Total cold run time: 103.41 s
Total hot run time: 28.95 s

@hello-stephen
Copy link
Contributor

BE UT Coverage Report

Increment line coverage 100.00% (5/5) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 55.68% (14895/26751)
Line Coverage 44.49% (131356/295261)
Region Coverage 43.39% (66183/152534)
Branch Coverage 38.06% (33797/88794)

@hello-stephen
Copy link
Contributor

BE Regression && UT Coverage Report

Increment line coverage 100.00% (5/5) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 79.22% (20794/26248)
Line Coverage 72.47% (213621/294759)
Region Coverage 70.68% (126448/178894)
Branch Coverage 64.30% (65149/101326)

@mrhhsg
Copy link
Member Author

mrhhsg commented May 9, 2025

run buildall

@doris-robot
Copy link

TPC-H: Total hot run time: 33737 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpch-tools
Tpch sf100 test result on commit e4d608b833c956be180b1e7454474da7986f411d, data reload: false

------ Round 1 ----------------------------------
q1	27621	5057	5078	5057
q2	2073	291	182	182
q3	10464	1250	703	703
q4	10225	1021	546	546
q5	7682	2423	2329	2329
q6	195	163	131	131
q7	942	766	615	615
q8	9321	1384	1106	1106
q9	6860	5071	5152	5071
q10	6869	2318	1883	1883
q11	489	273	284	273
q12	346	345	202	202
q13	17786	3726	3085	3085
q14	236	226	216	216
q15	554	484	487	484
q16	411	423	364	364
q17	603	861	360	360
q18	7657	7235	7043	7043
q19	1226	957	553	553
q20	338	336	226	226
q21	3991	2545	2345	2345
q22	1040	990	963	963
Total cold run time: 116929 ms
Total hot run time: 33737 ms

----- Round 2, with runtime_filter_mode=off -----
q1	5055	5030	5005	5005
q2	233	324	223	223
q3	2171	2672	2285	2285
q4	1364	1807	1382	1382
q5	4585	4481	4391	4391
q6	215	172	129	129
q7	2045	1937	1732	1732
q8	2582	2461	2424	2424
q9	7193	7212	7254	7212
q10	2984	3160	2698	2698
q11	565	508	499	499
q12	685	786	619	619
q13	3543	3913	3220	3220
q14	283	292	273	273
q15	513	475	474	474
q16	473	514	458	458
q17	1158	1572	1348	1348
q18	7682	7565	7417	7417
q19	808	872	929	872
q20	1982	2033	1844	1844
q21	4774	4376	4344	4344
q22	1066	1025	948	948
Total cold run time: 51959 ms
Total hot run time: 49797 ms

@doris-robot
Copy link

TPC-DS: Total hot run time: 185397 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpcds-tools
TPC-DS sf100 test result on commit e4d608b833c956be180b1e7454474da7986f411d, data reload: false

query1	1062	488	484	484
query2	6577	1838	1780	1780
query3	6748	222	218	218
query4	26847	23538	23498	23498
query5	4331	631	472	472
query6	304	214	193	193
query7	4628	489	275	275
query8	283	238	221	221
query9	8608	2562	2557	2557
query10	470	311	267	267
query11	15607	14937	15047	14937
query12	169	109	103	103
query13	1655	523	408	408
query14	9571	6168	6210	6168
query15	207	186	170	170
query16	7253	621	504	504
query17	1199	723	572	572
query18	1976	413	314	314
query19	194	186	172	172
query20	123	118	117	117
query21	214	130	109	109
query22	4054	4155	4155	4155
query23	33957	32727	32843	32727
query24	8405	2416	2370	2370
query25	546	505	392	392
query26	1239	265	152	152
query27	2757	495	325	325
query28	4345	2091	2100	2091
query29	766	553	418	418
query30	278	219	192	192
query31	980	846	771	771
query32	71	64	60	60
query33	559	378	316	316
query34	792	848	500	500
query35	783	820	729	729
query36	961	1008	911	911
query37	109	106	76	76
query38	4097	4116	4113	4113
query39	1463	1410	1380	1380
query40	215	125	107	107
query41	58	55	53	53
query42	118	104	101	101
query43	500	521	485	485
query44	1289	785	776	776
query45	172	175	169	169
query46	809	1018	634	634
query47	1763	1783	1747	1747
query48	371	421	288	288
query49	771	497	413	413
query50	641	685	390	390
query51	4124	4081	4006	4006
query52	108	105	101	101
query53	248	255	183	183
query54	589	569	502	502
query55	82	80	84	80
query56	300	307	281	281
query57	1140	1132	1071	1071
query58	254	257	247	247
query59	2545	2644	2521	2521
query60	318	314	290	290
query61	130	142	128	128
query62	782	741	656	656
query63	222	184	183	183
query64	4328	998	675	675
query65	4281	4175	4290	4175
query66	1130	407	311	311
query67	15781	15532	15191	15191
query68	8055	874	507	507
query69	463	307	271	271
query70	1206	1100	1119	1100
query71	513	330	299	299
query72	5511	4770	4779	4770
query73	720	606	340	340
query74	8850	8921	8656	8656
query75	3886	3199	2707	2707
query76	3713	1211	744	744
query77	782	372	291	291
query78	9979	10246	9297	9297
query79	2130	826	561	561
query80	578	540	441	441
query81	487	262	213	213
query82	452	124	96	96
query83	257	243	237	237
query84	259	105	125	105
query85	809	359	328	328
query86	387	324	273	273
query87	4324	4412	4322	4322
query88	3483	2188	2160	2160
query89	393	325	294	294
query90	1877	215	211	211
query91	141	196	112	112
query92	74	59	58	58
query93	1645	943	573	573
query94	668	401	314	314
query95	370	290	286	286
query96	486	565	270	270
query97	3178	3255	3131	3131
query98	235	208	202	202
query99	1354	1406	1247	1247
Total cold run time: 274522 ms
Total hot run time: 185397 ms

@doris-robot
Copy link

ClickBench: Total hot run time: 29.08 s
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/clickbench-tools
ClickBench test result on commit e4d608b833c956be180b1e7454474da7986f411d, data reload: false

query1	0.04	0.04	0.03
query2	0.12	0.10	0.12
query3	0.25	0.19	0.20
query4	1.59	0.19	0.18
query5	0.59	0.57	0.58
query6	1.18	0.72	0.72
query7	0.02	0.02	0.02
query8	0.04	0.04	0.04
query9	0.56	0.54	0.52
query10	0.57	0.58	0.57
query11	0.15	0.11	0.11
query12	0.14	0.11	0.11
query13	0.61	0.60	0.59
query14	0.78	0.80	0.80
query15	0.88	0.85	0.86
query16	0.40	0.38	0.39
query17	1.04	1.02	1.02
query18	0.21	0.19	0.19
query19	1.90	1.83	1.81
query20	0.01	0.01	0.01
query21	15.40	0.90	0.54
query22	0.75	1.26	0.66
query23	14.87	1.39	0.65
query24	7.44	0.77	0.69
query25	0.49	0.12	0.08
query26	0.67	0.17	0.15
query27	0.06	0.05	0.04
query28	9.20	0.90	0.43
query29	12.55	3.98	3.26
query30	0.24	0.10	0.06
query31	2.84	0.58	0.37
query32	3.22	0.55	0.47
query33	3.05	3.13	3.07
query34	15.75	5.06	4.49
query35	4.55	4.50	4.52
query36	0.67	0.50	0.48
query37	0.08	0.06	0.06
query38	0.04	0.04	0.04
query39	0.03	0.02	0.02
query40	0.18	0.15	0.14
query41	0.08	0.03	0.03
query42	0.04	0.03	0.02
query43	0.04	0.03	0.02
Total cold run time: 103.32 s
Total hot run time: 29.08 s

@hello-stephen
Copy link
Contributor

BE UT Coverage Report

Increment line coverage 66.04% (70/106) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 55.70% (14871/26696)
Line Coverage 44.51% (131552/295583)
Region Coverage 43.58% (66173/151848)
Branch Coverage 38.17% (33903/88812)

@hello-stephen
Copy link
Contributor

BE Regression && UT Coverage Report

Increment line coverage 83.02% (88/106) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 79.39% (20852/26264)
Line Coverage 72.54% (214375/295524)
Region Coverage 70.70% (126106/178358)
Branch Coverage 64.43% (65343/101424)

template <int JoinOpType>
Status ProcessHashTableProbe<JoinOpType>::do_right_half_mark_join_conjuncts(
vectorized::Block* output_block) {
DCHECK(JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || JoinOpType == TJoinOp::RIGHT_ANTI_JOIN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return error status to replace dcheck

DCHECK_EQ(filter.size(), other_conjunct_filter.size());
const auto* other_filter_data = other_conjunct_filter.data();
for (size_t i = 0; i != filter.size(); ++i) {
// null & any(true or false) => null => false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we can abstract part which similar with do_mark_join_conjuncts to reuse code ?

_probe_side_output_timer(parent->_probe_side_output_timer),
_finish_probe_phase_timer(parent->_finish_probe_phase_timer),
_right_col_idx((_parent_operator->_is_right_semi_anti && !_have_other_join_conjunct)
_right_col_idx((_parent_operator->_is_right_semi_anti && !_have_other_join_conjunct &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better directly get right_col_idx from parent operator, and use left+right=intermediate to do double check

Copy link
Contributor

@BiteTheDDDDt BiteTheDDDDt left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@github-actions
Copy link
Contributor

PR approved by at least one committer and no changes requested.

@github-actions github-actions bot added approved Indicates a PR has been approved by one committer. reviewed labels May 12, 2025
@github-actions
Copy link
Contributor

PR approved by anyone and no changes requested.

@yiguolei
Copy link
Contributor

run buildall

@yiguolei yiguolei added usercase Important user case type label dev/2.1.x labels May 13, 2025
@doris-robot
Copy link

ClickBench: Total hot run time: 28.89 s
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/clickbench-tools
ClickBench test result on commit ba2202e8b77fded9c7c977b01ed43bb5446e45ae, data reload: false

query1	0.03	0.03	0.03
query2	0.13	0.10	0.11
query3	0.26	0.19	0.19
query4	1.60	0.18	0.18
query5	0.58	0.58	0.58
query6	1.19	0.73	0.72
query7	0.03	0.02	0.01
query8	0.04	0.04	0.04
query9	0.56	0.52	0.51
query10	0.57	0.58	0.57
query11	0.16	0.11	0.11
query12	0.15	0.12	0.11
query13	0.62	0.59	0.59
query14	0.78	0.81	0.82
query15	0.85	0.85	0.85
query16	0.37	0.38	0.39
query17	1.05	1.03	1.02
query18	0.22	0.21	0.21
query19	1.84	1.82	1.81
query20	0.01	0.01	0.01
query21	15.40	0.92	0.53
query22	0.75	1.22	0.76
query23	14.84	1.37	0.65
query24	7.63	0.83	0.32
query25	0.46	0.12	0.16
query26	0.59	0.16	0.14
query27	0.05	0.05	0.05
query28	9.70	0.88	0.43
query29	12.52	3.97	3.34
query30	0.25	0.09	0.06
query31	2.81	0.60	0.38
query32	3.23	0.54	0.46
query33	3.08	3.03	3.14
query34	15.77	5.12	4.52
query35	4.49	4.50	4.47
query36	0.69	0.49	0.49
query37	0.08	0.06	0.06
query38	0.06	0.04	0.04
query39	0.03	0.02	0.02
query40	0.18	0.13	0.12
query41	0.08	0.02	0.02
query42	0.03	0.02	0.02
query43	0.03	0.03	0.04
Total cold run time: 103.79 s
Total hot run time: 28.89 s

@hello-stephen
Copy link
Contributor

BE UT Coverage Report

Increment line coverage 86.36% (95/110) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 55.79% (14894/26698)
Line Coverage 44.59% (131829/295636)
Region Coverage 43.64% (66292/151892)
Branch Coverage 38.26% (33965/88780)

@hello-stephen
Copy link
Contributor

BE Regression && UT Coverage Report

Increment line coverage 86.36% (95/110) 🎉

Increment coverage report
Complete coverage report

Category Coverage
Function Coverage 79.47% (20871/26264)
Line Coverage 72.69% (214862/295572)
Region Coverage 70.83% (126361/178397)
Branch Coverage 64.55% (65449/101390)

@yiguolei yiguolei merged commit 966865c into apache:master May 14, 2025
27 of 29 checks passed
@mrhhsg mrhhsg deleted the fix_join branch May 16, 2025 00:49
mrhhsg added a commit to mrhhsg/doris that referenced this pull request May 16, 2025
mrhhsg added a commit to mrhhsg/doris that referenced this pull request May 16, 2025
mrhhsg added a commit to mrhhsg/doris that referenced this pull request May 17, 2025
mrhhsg added a commit to mrhhsg/doris that referenced this pull request May 22, 2025
zddr added a commit to zddr/incubator-doris that referenced this pull request May 22, 2025
zddr added a commit to zddr/incubator-doris that referenced this pull request May 22, 2025
koarz pushed a commit to koarz/doris that referenced this pull request Jun 4, 2025
)

### What problem does this PR solve?

If there is a mark join condition, then even in a right semi join, the
columns from the left table involved in the mark join condition will
still appear in the intermediate tuple.

This PR also completes the missing handling logic for right semi joins
with mark join condition.

```
==2126730==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x503004ee02e0 at pc 0x55f4a7d3f354 bp 0x7f6b6b587e70 sp 0x7f6b6b587e68
READ of size 8 at 0x503004ee02e0 thread T1296 (brpc_light)
    #0 0x55f4a7d3f353 in std::__shared_ptr<doris::vectorized::IDataType const, (__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<doris::vectorized::IDataType const, (__gnu_cxx::_Lock_policy)2> const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/shared_ptr_base.h:1522:7
    apache#1 0x55f4a7d3f01e in std::shared_ptr<doris::vectorized::IDataType const>::shared_ptr(std::shared_ptr<doris::vectorized::IDataType const> const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/shared_ptr.h:204:7
    apache#2 0x55f4f8aa231c in doris::pipeline::HashJoinProbeOperatorX::prepare(doris::RuntimeState*) /root/doris/be/src/pipeline/exec/hashjoin_probe_operator.cpp:577:33
    apache#3 0x55f4fb1e4a28 in doris::pipeline::Pipeline::prepare(doris::RuntimeState*) /root/doris/be/src/pipeline/pipeline.cpp:89:5
    apache#4 0x55f4fb089d3f in doris::pipeline::PipelineFragmentContext::prepare(doris::TPipelineFragmentParams const&, doris::ThreadPool*) /root/doris/be/src/pipeline/pipeline_fragment_context.cpp:352:9
    apache#5 0x55f4ad615a5c in doris::FragmentMgr::exec_plan_fragment(doris::TPipelineFragmentParams const&, doris::QuerySource, std::function<void (doris::RuntimeState*, doris::Status*)> const&, doris::TPipelineFragmentParamsList const&) /root/doris/be/src/runtime/fragment_mgr.cpp:855:9
    apache#6 0x55f4ad613ca6 in doris::FragmentMgr::exec_plan_fragment(doris::TPipelineFragmentParams const&, doris::QuerySource, doris::TPipelineFragmentParamsList const&) /root/doris/be/src/runtime/fragment_mgr.cpp:634:16
    apache#7 0x55f4ae2e867c in doris::PInternalService::_exec_plan_fragment_impl(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, doris::PFragmentRequestVersion, bool, std::function<void (doris::RuntimeState*, doris::Status*)> const&) /root/doris/be/src/service/internal_service.cpp:613:17
    apache#8 0x55f4ae2e4b67 in doris::PInternalService::_exec_plan_fragment_in_pthread(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*) /root/doris/be/src/service/internal_service.cpp:343:14
    apache#9 0x55f4ae31d4e1 in doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0::operator()() const /root/doris/be/src/service/internal_service.cpp:367:9
    apache#10 0x55f4ae31d33e in void std::__invoke_impl<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>(std::__invoke_other, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    apache#11 0x55f4ae31d27e in std::enable_if<is_invocable_r_v<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>, void>::type std::__invoke_r<void, doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&>(doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
    apache#12 0x55f4ae31cf55 in std::_Function_handler<void (), doris::PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController*, doris::PExecPlanFragmentRequest const*, doris::PExecPlanFragmentResult*, google::protobuf::Closure*)::$_0>::_M_invoke(std::_Any_data const&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
    apache#13 0x55f4a74175af in std::function<void ()>::operator()() const /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
    apache#14 0x55f4ae3c21c4 in doris::WorkThreadPool<false>::work_thread(int) /root/doris/be/src/util/work_thread_pool.hpp:158:17
    apache#15 0x55f4ae3c4cc8 in void std::__invoke_impl<void, void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>(std::__invoke_memfun_deref, void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14
    apache#16 0x55f4ae3c4a92 in std::__invoke_result<void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>::type std::__invoke<void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&>(void (doris::WorkThreadPool<false>::* const&)(int), doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
    apache#17 0x55f4ae3c49f8 in decltype(std::__invoke((*this)._M_pmf, std::forward<doris::WorkThreadPool<false>*&>(fp), std::forward<int&>(fp))) std::_Mem_fn_base<void (doris::WorkThreadPool<false>::*)(int), true>::operator()<doris::WorkThreadPool<false>*&, int&>(doris::WorkThreadPool<false>*&, int&) const /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:170:11
    apache#18 0x55f4ae3c4942 in void std::__invoke_impl<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>(std::__invoke_other, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    apache#19 0x55f4ae3c4752 in std::enable_if<is_invocable_r_v<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>, void>::type std::__invoke_r<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&>(std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)>&, doris::WorkThreadPool<false>*&, int&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
    apache#20 0x55f4ae3c4664 in void std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>::__call<void, 0ul, 1ul>(std::tuple<>&&, std::_Index_tuple<0ul, 1ul>) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:654:11
    apache#21 0x55f4ae3c4395 in void std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>::operator()<>() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:713:17
    apache#22 0x55f4ae3c428e in void std::__invoke_impl<void, std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>(std::__invoke_other, std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>&&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
    apache#23 0x55f4ae3c41ce in std::__invoke_result<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>::type std::__invoke<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>(std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>&&) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
    apache#24 0x55f4ae3c417b in void std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>::_M_invoke<0ul>(std::_Index_tuple<0ul>) /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:292:13
    apache#25 0x55f4ae3c40f6 in std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>::operator()() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:299:11
    apache#26 0x55f4ae3c3f34 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::_Bind_result<void, std::_Mem_fn<void (doris::WorkThreadPool<false>::*)(int)> (doris::WorkThreadPool<false>*, int)>>>>::_M_run() /root/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_thread.h:244:13
    apache#27 0x55f4fea05d2e in execute_native_thread_routine pthread_atfork.c
    apache#28 0x55f4a7162e0a in asan_thread_start(void*) crtstuff.c
    apache#29 0x7f7149c421c9 in start_thread (/lib64/libpthread.so.0+0x81c9) (BuildId: 7c4add5c7a885e6ff4ce17867d6a2286e4420eec)
    apache#30 0x7f714a6318d2 in clone (/lib64/libc.so.6+0x398d2) (BuildId: 4ee3325955e3b55b6805f33959b7cb77745ad625)
```
dataroaring pushed a commit that referenced this pull request Jun 11, 2025
…51156)

### What problem does this PR solve?

Pick #50720 #50993 #51124

Co-authored-by: zhangdong <zhangdong@selectdb.com>
zddr added a commit to zddr/incubator-doris that referenced this pull request Jun 20, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

approved Indicates a PR has been approved by one committer. dev/2.1.11-merged dev/3.0.7-merged dev/3.1.0-merged reviewed usercase Important user case type label

Projects

None yet

Development

Successfully merging this pull request may close these issues.

7 participants