diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 079f7687bdb0b..61d5584f9c724 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -2112,23 +2112,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); } - bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); - bool IsSibcall = false; + // We cannot guarantee TCO for mismatched calling conventions. if (isTailCall && ShouldGuaranteeTCO) { - // If we need to guarantee TCO for a non-musttail call, we just need to make - // sure the conventions match. If a tail call uses one of the supported TCO - // conventions and the caller and callee match, we can tail call any - // function prototype. CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); isTailCall = (CallConv == CallerCC); - IsSibcall = IsMustTail; - } else if (isTailCall) { - // Check if this tail call is a "sibling" call, which is loosely defined to - // be a tail call that doesn't require heroics like moving the return - // address or swapping byval arguments. We treat some musttail calls as - // sibling calls to avoid unnecessary argument copies. + } + + // Check if this tail call is a "sibling" call, which is loosely defined to + // be a tail call that doesn't require heroics like moving the return + // address or swapping byval arguments. We treat some musttail calls as + // sibling calls to avoid unnecessary argument copies. + bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); + bool IsSibcall = false; + if (isTailCall) { IsSibcall = isEligibleForSiblingCallOpt(CLI, CCInfo, ArgLocs); - isTailCall = IsSibcall || IsMustTail; + isTailCall = IsSibcall || IsMustTail || ShouldGuaranteeTCO; } if (isTailCall) diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll index 4cb033b1a6580..3cdc7d11b4d5f 100644 --- a/llvm/test/CodeGen/X86/hipe-cc64.ll +++ b/llvm/test/CodeGen/X86/hipe-cc64.ll @@ -55,14 +55,12 @@ entry: define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $48, %rsp -; CHECK-NEXT: movq %r15, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rbp, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r8, (%rsp) -; CHECK-NEXT: addq $48, %rsp +; CHECK-NEXT: movq %r15, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jmp bar@PLT # TAILCALL entry: %hp_var = alloca i64 diff --git a/llvm/test/CodeGen/X86/musttail-tailcc.ll b/llvm/test/CodeGen/X86/musttail-tailcc.ll index f1ffbcb1142c5..b366416b7aec5 100644 --- a/llvm/test/CodeGen/X86/musttail-tailcc.ll +++ b/llvm/test/CodeGen/X86/musttail-tailcc.ll @@ -55,6 +55,15 @@ define dso_local tailcc void @void_test(i32, i32, i32, i32) { ; ; X86-LABEL: void_test: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: jmp void_test # TAILCALL entry: musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) @@ -68,8 +77,59 @@ define dso_local tailcc i1 @i1test(i32, i32, i32, i32) { ; ; X86-LABEL: i1test: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: jmp i1test # TAILCALL entry: %4 = musttail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) ret i1 %4 } + +; Regression test: musttail tailcc with non-forwarded stack args. +declare tailcc void @f1_64(i64, i64, i64, i64, i64, i64, i64) + +define tailcc void @stack_arg_const_64(i64, i64, i64, i64, i64, i64, i64) { +; X86-LABEL: stack_arg_const_64: +; X86: # %bb.0: +; X86-NEXT: movl $4, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $8, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $15, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $16, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $23, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $42, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: jmp f1_64@PLT # TAILCALL + musttail call tailcc void @f1_64(i64 1, i64 4, i64 8, i64 15, i64 16, i64 23, i64 42) + ret void +} + +declare tailcc void @f1_32(i32, i32, i32, i32, i32, i32, i32) + +define tailcc void @stack_arg_const_32(i32, i32, i32, i32, i32, i32, i32) { +; X86-LABEL: stack_arg_const_32: +; X86: # %bb.0: +; X86-NEXT: movl $8, {{[0-9]+}}(%esp) +; X86-NEXT: movl $15, {{[0-9]+}}(%esp) +; X86-NEXT: movl $16, {{[0-9]+}}(%esp) +; X86-NEXT: movl $23, {{[0-9]+}}(%esp) +; X86-NEXT: movl $42, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, %ecx +; X86-NEXT: movl $4, %edx +; X86-NEXT: jmp f1_32@PLT # TAILCALL + musttail call tailcc void @f1_32(i32 1, i32 4, i32 8, i32 15, i32 16, i32 23, i32 42) + ret void +} diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll index d1137cac7d365..733d8f98fca28 100644 --- a/llvm/test/CodeGen/X86/sibcall.ll +++ b/llvm/test/CodeGen/X86/sibcall.ll @@ -1075,6 +1075,84 @@ define void @t25_sret_to_sret_different_val(ptr noalias sret(%struct.foo) align ret void } +; Test musttail with non-forwarded stack arguments (more than 6 args). +declare void @f1_64(i64, i64, i64, i64, i64, i64, i64) + +define void @stack_arg_const_64(i64, i64, i64, i64, i64, i64, i64) { +; X86-LABEL: stack_arg_const_64: +; X86: # %bb.0: +; X86-NEXT: movaps {{.*#+}} xmm0 = [1,0,4,0] +; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movaps {{.*#+}} xmm0 = [8,0,15,0] +; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movaps {{.*#+}} xmm0 = [16,0,23,0] +; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $42, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: jmp f1_64@PLT # TAILCALL +; +; X64-LABEL: stack_arg_const_64: +; X64: # %bb.0: +; X64-NEXT: movq $42, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $1, %edi +; X64-NEXT: movl $4, %esi +; X64-NEXT: movl $8, %edx +; X64-NEXT: movl $15, %ecx +; X64-NEXT: movl $16, %r8d +; X64-NEXT: movl $23, %r9d +; X64-NEXT: jmp f1_64@PLT # TAILCALL +; +; X32-LABEL: stack_arg_const_64: +; X32: # %bb.0: +; X32-NEXT: movq $42, {{[0-9]+}}(%esp) +; X32-NEXT: movl $1, %edi +; X32-NEXT: movl $4, %esi +; X32-NEXT: movl $8, %edx +; X32-NEXT: movl $15, %ecx +; X32-NEXT: movl $16, %r8d +; X32-NEXT: movl $23, %r9d +; X32-NEXT: jmp f1_64@PLT # TAILCALL + musttail call void @f1_64(i64 1, i64 4, i64 8, i64 15, i64 16, i64 23, i64 42) + ret void +} + +declare void @f1_32(i32, i32, i32, i32, i32, i32, i32) + +define void @stack_arg_const_32(i32, i32, i32, i32, i32, i32, i32) { +; X86-LABEL: stack_arg_const_32: +; X86: # %bb.0: +; X86-NEXT: movaps {{.*#+}} xmm0 = [1,4,8,15] +; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $16, {{[0-9]+}}(%esp) +; X86-NEXT: movl $23, {{[0-9]+}}(%esp) +; X86-NEXT: movl $42, {{[0-9]+}}(%esp) +; X86-NEXT: jmp f1_32@PLT # TAILCALL +; +; X64-LABEL: stack_arg_const_32: +; X64: # %bb.0: +; X64-NEXT: movl $42, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $1, %edi +; X64-NEXT: movl $4, %esi +; X64-NEXT: movl $8, %edx +; X64-NEXT: movl $15, %ecx +; X64-NEXT: movl $16, %r8d +; X64-NEXT: movl $23, %r9d +; X64-NEXT: jmp f1_32@PLT # TAILCALL +; +; X32-LABEL: stack_arg_const_32: +; X32: # %bb.0: +; X32-NEXT: movl $42, {{[0-9]+}}(%esp) +; X32-NEXT: movl $1, %edi +; X32-NEXT: movl $4, %esi +; X32-NEXT: movl $8, %edx +; X32-NEXT: movl $15, %ecx +; X32-NEXT: movl $16, %r8d +; X32-NEXT: movl $23, %r9d +; X32-NEXT: jmp f1_32@PLT # TAILCALL + musttail call void @f1_32(i32 1, i32 4, i32 8, i32 15, i32 16, i32 23, i32 42) + ret void +} + declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) declare void @callee_1(ptr) declare void @callee_2(ptr noalias sret(%struct.foo)) diff --git a/llvm/test/CodeGen/X86/swifttailcc-store-ret-address-aliasing-stack-slot.ll b/llvm/test/CodeGen/X86/swifttailcc-store-ret-address-aliasing-stack-slot.ll index b901d22f66392..b4aeaf30e745e 100644 --- a/llvm/test/CodeGen/X86/swifttailcc-store-ret-address-aliasing-stack-slot.ll +++ b/llvm/test/CodeGen/X86/swifttailcc-store-ret-address-aliasing-stack-slot.ll @@ -26,6 +26,8 @@ define swifttailcc void @test(ptr %0, ptr swiftasync %1, i64 %2, i64 %3, ptr %4, ; CHECK-NEXT: callq _foo ; CHECK-NEXT: movq %r14, (%rax) ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq %r13, %rdi ; CHECK-NEXT: movq %r15, %rsi @@ -33,6 +35,7 @@ define swifttailcc void @test(ptr %0, ptr swiftasync %1, i64 %2, i64 %3, ptr %4, ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r15 +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: jmp _tc_fn ## TAILCALL entry: %res = tail call ptr @foo()