Revert "[HLSL][DirectX] Emit convergence control tokens when targeting DirectX"#193090
Conversation
…g Direct…" This reverts commit 2c8c2bd.
|
@llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-backend-mips Author: Finn Plummer (inbelic) ChangesThis change appears to introduce complications when trying to do a full loop unroll that is exhibited here: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. This results in invalid DXIL as the unreachable branch is not correctly cleaned up. Initial leads look like this is because the instructions with convergence control tokens are still being used for analysis when they are within an unreachable branch. Reverts llvm/llvm-project#188792 Patch is 229.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/193090.diff 73 Files Affected:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index d3dc1014471ec..3a4291719da74 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -715,9 +715,6 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreatePHI(element->getType(), 2, "arrayinit.cur");
currentElement->addIncoming(element, entryBB);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
-
// Emit the actual filler expression.
{
// C++1z [class.temporary]p5:
@@ -749,9 +746,6 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreateCondBr(done, endBB, bodyBB);
currentElement->addIncoming(nextElement, Builder.GetInsertBlock());
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.pop_back();
-
CGF.EmitBlock(endBB);
}
}
@@ -1993,9 +1987,6 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::Value *element =
Builder.CreateInBoundsGEP(llvmElementType, begin, index);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
-
// Prepare for a cleanup.
QualType::DestructionKind dtorKind = elementType.isDestructedType();
EHScopeStack::stable_iterator cleanup;
@@ -2043,9 +2034,6 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::BasicBlock *endBB = CGF.createBasicBlock("arrayinit.end");
Builder.CreateCondBr(done, endBB, bodyBB);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.pop_back();
-
CGF.EmitBlock(endBB);
// Leave the partial-array cleanup if we entered one.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index d0d8eed26d8c2..f510195dbd6cb 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -574,13 +574,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *IndexOp = EmitScalarExpr(E->getArg(1));
llvm::Type *RetTy = ConvertType(E->getType());
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- &CGM.getModule(),
- CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
- {RetTy, HandleOp->getType(), IndexOp->getType()});
- llvm::CallInst *CI = EmitRuntimeCall(IntrFn, {HandleOp, IndexOp});
- CI->setCallingConv(IntrFn->getCallingConv());
- return CI;
+ return Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
+ ArrayRef<Value *>{HandleOp, IndexOp});
}
case Builtin::BI__builtin_hlsl_resource_sample: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 99b3a140b48c2..1e25172d18890 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -657,16 +657,8 @@ CGHLSLRuntime::emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
llvm::PoisonValue::get(B.getInt32Ty())};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_load_input;
-
- SmallVector<OperandBundleDef, 1> OB;
- if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
- llvm::Value *bundleArgs[] = {Token};
- OB.emplace_back("convergencectrl", bundleArgs);
- }
-
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- B.GetInsertBlock()->getModule(), IntrinsicID, {Type});
- llvm::Value *Value = B.CreateCall(IntrFn, Args, OB, VariableName);
+ llvm::Value *Value = B.CreateIntrinsic(/*ReturnType=*/Type, IntrinsicID, Args,
+ nullptr, VariableName);
return Value;
}
@@ -684,16 +676,7 @@ void CGHLSLRuntime::emitDXILUserSemanticStore(llvm::IRBuilder<> &B,
Source};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_store_output;
-
- SmallVector<OperandBundleDef, 1> OB;
- if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
- llvm::Value *bundleArgs[] = {Token};
- OB.emplace_back("convergencectrl", bundleArgs);
- }
-
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- B.GetInsertBlock()->getModule(), IntrinsicID, {Source->getType()});
- B.CreateCall(IntrFn, Args, OB);
+ B.CreateIntrinsic(/*ReturnType=*/CGM.VoidTy, IntrinsicID, Args, nullptr);
}
llvm::Value *CGHLSLRuntime::emitUserSemanticLoad(
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d1752b86b6603..29b87a0616992 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5437,11 +5437,11 @@ class CodeGenFunction : public CodeGenTypeCache {
void maybeAttachRangeForLoad(llvm::LoadInst *Load, QualType Ty,
SourceLocation Loc);
+private:
// Emits a convergence_loop instruction for the given |BB|, with |ParentToken|
// as it's parent convergence instr.
llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB);
-private:
// Adds a convergence_ctrl token with |ParentToken| as parent convergence
// instr to the call |Input|.
llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index dc296919aa32f..d62707a3355c9 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1815,7 +1815,7 @@ class CodeGenModule : public CodeGenTypeCache {
bool shouldEmitConvergenceTokens() const {
// TODO: this should probably become unconditional once the controlled
// convergence becomes the norm.
- return getTriple().isSPIRVLogical() || getTriple().isDXIL();
+ return getTriple().isSPIRVLogical();
}
void addUndefinedGlobalForTailCall(
diff --git a/clang/test/CodeGenDirectX/Builtins/dot2add.c b/clang/test/CodeGenDirectX/Builtins/dot2add.c
index bc5073995522e..4275a285012b0 100644
--- a/clang/test/CodeGenDirectX/Builtins/dot2add.c
+++ b/clang/test/CodeGenDirectX/Builtins/dot2add.c
@@ -8,7 +8,6 @@ typedef half half2 __attribute__((ext_vector_type(2)));
// CHECK-LABEL: define float @test_dot2add(
// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca float, align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
index b4235eed318e4..832c4ac9b10f5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
@@ -3,14 +3,12 @@
typedef int Foo[2];
// CHECK-LABEL: define void {{.*}}boop{{.*}}(ptr dead_on_unwind noalias writable sret([2 x i32]) align 4 %agg.result)
-// CHECK: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK: [[G:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[G]], ptr align 4 {{.*}}, i32 8, i1 false)
// CHECK-NEXT: [[AIB:%.*]] = getelementptr inbounds [2 x i32], ptr %agg.result, i32 0, i32 0
// CHECK-NEXT: br label %arrayinit.body
// CHECK: arrayinit.body:
// CHECK-NEXT: [[AII:%.*]] = phi i32 [ 0, %entry ], [ %arrayinit.next, %arrayinit.body ]
-// CHECK-NEXT: %[[#CV_LOOP:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#C_ENTRY]]) ]
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds i32, ptr [[AIB]], i32 [[AII]]
// CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[G]], i32 0, i32 [[AII]]
// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[AI]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 40f32c28ad5ed..9c42da8962c2d 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -66,7 +66,6 @@ struct UnnamedDerived : UnnamedOnly {};
// CHECK-LABEL: define hidden void @_Z5case1v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case1v.TF1, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -79,7 +78,6 @@ TwoFloats case1() {
// CHECK-LABEL: define hidden void @_Z5case2v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case2v.TF2, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -92,7 +90,6 @@ TwoFloats case2() {
// CHECK-LABEL: define hidden void @_Z5case3i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[VAL:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -113,7 +110,6 @@ TwoFloats case3(int Val) {
// CHECK-LABEL: define hidden void @_Z5case4Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -137,7 +133,6 @@ TwoFloats case4(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case5Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -160,7 +155,6 @@ TwoInts case5(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case69TwoFloats(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 1
@@ -183,7 +177,6 @@ TwoInts case6(TwoFloats TF4) {
// CHECK-LABEL: define hidden void @_Z5case77TwoIntsS_i9TwoFloatsS0_S0_S0_(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI1:%.*]], ptr noundef byval([[STRUCT_TWOINTS]]) align 1 [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF3:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -248,7 +241,6 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
// CHECK-LABEL: define hidden void @_Z5case85Doggo(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE]], align 1
@@ -335,7 +327,6 @@ AnimalBits case8(Doggo D1) {
// CHECK-LABEL: define hidden void @_Z5case95Doggo10AnimalBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]], ptr noundef byval([[STRUCT_ANIMALBITS:%.*]]) align 1 [[A1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[DOGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ZOO]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[DOGS]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE1:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
@@ -752,7 +743,6 @@ Zoo case9(Doggo D1, AnimalBits A1) {
// CHECK-LABEL: define hidden void @_Z6case109TwoFloatsS_(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X1]], align 1
@@ -780,7 +770,6 @@ FourFloats case10(TwoFloats TF1, TwoFloats TF2) {
// CHECK-LABEL: define hidden void @_Z6case11f(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca <4 x float>, align 4
// CHECK-NEXT: [[REF_TMP1:%.*]] = alloca <4 x float>, align 4
@@ -830,7 +819,6 @@ FourFloats case11(float F) {
// CHECK-LABEL: define hidden void @_Z6case12ii(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
@@ -853,7 +841,6 @@ SlicyBits case12(int I, int J) {
// CHECK-LABEL: define hidden void @_Z6case137TwoInts(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
@@ -874,7 +861,6 @@ SlicyBits case13(TwoInts TI) {
// CHECK-LABEL: define hidden void @_Z6case149SlicyBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
@@ -895,7 +881,6 @@ TwoInts case14(SlicyBits SB) {
// CHECK-LABEL: define hidden void @_Z6case159SlicyBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
@@ -919,7 +904,6 @@ TwoFloats case15(SlicyBits SB) {
// CHECK-LABEL: define hidden void @_Z7makeTwoRf(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noalias noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -946,7 +930,6 @@ TwoFloats makeTwo(inout float X) {
// CHECK-LABEL: define hidden void @_Z6case16v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
@@ -980,7 +963,6 @@ FourFloats case16() {
// CHECK-LABEL: define hidden noundef i32 @_Z12case17Helperi(
// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
@@ -994,7 +976,6 @@ int case17Helper(int x) {
// CHECK-LABEL: define hidden void @_Z6case17v(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z12case17Helperi(i32 noundef 0) #[[ATTR2]]
// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z12case17Helperi(i32 nou...
[truncated]
|
|
@llvm/pr-subscribers-clang-codegen Author: Finn Plummer (inbelic) ChangesThis change appears to introduce complications when trying to do a full loop unroll that is exhibited here: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. This results in invalid DXIL as the unreachable branch is not correctly cleaned up. Initial leads look like this is because the instructions with convergence control tokens are still being used for analysis when they are within an unreachable branch. Reverts llvm/llvm-project#188792 Patch is 229.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/193090.diff 73 Files Affected:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index d3dc1014471ec..3a4291719da74 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -715,9 +715,6 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreatePHI(element->getType(), 2, "arrayinit.cur");
currentElement->addIncoming(element, entryBB);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
-
// Emit the actual filler expression.
{
// C++1z [class.temporary]p5:
@@ -749,9 +746,6 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreateCondBr(done, endBB, bodyBB);
currentElement->addIncoming(nextElement, Builder.GetInsertBlock());
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.pop_back();
-
CGF.EmitBlock(endBB);
}
}
@@ -1993,9 +1987,6 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::Value *element =
Builder.CreateInBoundsGEP(llvmElementType, begin, index);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
-
// Prepare for a cleanup.
QualType::DestructionKind dtorKind = elementType.isDestructedType();
EHScopeStack::stable_iterator cleanup;
@@ -2043,9 +2034,6 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::BasicBlock *endBB = CGF.createBasicBlock("arrayinit.end");
Builder.CreateCondBr(done, endBB, bodyBB);
- if (CGF.CGM.shouldEmitConvergenceTokens())
- CGF.ConvergenceTokenStack.pop_back();
-
CGF.EmitBlock(endBB);
// Leave the partial-array cleanup if we entered one.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index d0d8eed26d8c2..f510195dbd6cb 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -574,13 +574,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *IndexOp = EmitScalarExpr(E->getArg(1));
llvm::Type *RetTy = ConvertType(E->getType());
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- &CGM.getModule(),
- CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
- {RetTy, HandleOp->getType(), IndexOp->getType()});
- llvm::CallInst *CI = EmitRuntimeCall(IntrFn, {HandleOp, IndexOp});
- CI->setCallingConv(IntrFn->getCallingConv());
- return CI;
+ return Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
+ ArrayRef<Value *>{HandleOp, IndexOp});
}
case Builtin::BI__builtin_hlsl_resource_sample: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 99b3a140b48c2..1e25172d18890 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -657,16 +657,8 @@ CGHLSLRuntime::emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
llvm::PoisonValue::get(B.getInt32Ty())};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_load_input;
-
- SmallVector<OperandBundleDef, 1> OB;
- if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
- llvm::Value *bundleArgs[] = {Token};
- OB.emplace_back("convergencectrl", bundleArgs);
- }
-
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- B.GetInsertBlock()->getModule(), IntrinsicID, {Type});
- llvm::Value *Value = B.CreateCall(IntrFn, Args, OB, VariableName);
+ llvm::Value *Value = B.CreateIntrinsic(/*ReturnType=*/Type, IntrinsicID, Args,
+ nullptr, VariableName);
return Value;
}
@@ -684,16 +676,7 @@ void CGHLSLRuntime::emitDXILUserSemanticStore(llvm::IRBuilder<> &B,
Source};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_store_output;
-
- SmallVector<OperandBundleDef, 1> OB;
- if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
- llvm::Value *bundleArgs[] = {Token};
- OB.emplace_back("convergencectrl", bundleArgs);
- }
-
- llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
- B.GetInsertBlock()->getModule(), IntrinsicID, {Source->getType()});
- B.CreateCall(IntrFn, Args, OB);
+ B.CreateIntrinsic(/*ReturnType=*/CGM.VoidTy, IntrinsicID, Args, nullptr);
}
llvm::Value *CGHLSLRuntime::emitUserSemanticLoad(
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d1752b86b6603..29b87a0616992 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5437,11 +5437,11 @@ class CodeGenFunction : public CodeGenTypeCache {
void maybeAttachRangeForLoad(llvm::LoadInst *Load, QualType Ty,
SourceLocation Loc);
+private:
// Emits a convergence_loop instruction for the given |BB|, with |ParentToken|
// as it's parent convergence instr.
llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB);
-private:
// Adds a convergence_ctrl token with |ParentToken| as parent convergence
// instr to the call |Input|.
llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index dc296919aa32f..d62707a3355c9 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1815,7 +1815,7 @@ class CodeGenModule : public CodeGenTypeCache {
bool shouldEmitConvergenceTokens() const {
// TODO: this should probably become unconditional once the controlled
// convergence becomes the norm.
- return getTriple().isSPIRVLogical() || getTriple().isDXIL();
+ return getTriple().isSPIRVLogical();
}
void addUndefinedGlobalForTailCall(
diff --git a/clang/test/CodeGenDirectX/Builtins/dot2add.c b/clang/test/CodeGenDirectX/Builtins/dot2add.c
index bc5073995522e..4275a285012b0 100644
--- a/clang/test/CodeGenDirectX/Builtins/dot2add.c
+++ b/clang/test/CodeGenDirectX/Builtins/dot2add.c
@@ -8,7 +8,6 @@ typedef half half2 __attribute__((ext_vector_type(2)));
// CHECK-LABEL: define float @test_dot2add(
// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca float, align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
index b4235eed318e4..832c4ac9b10f5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
@@ -3,14 +3,12 @@
typedef int Foo[2];
// CHECK-LABEL: define void {{.*}}boop{{.*}}(ptr dead_on_unwind noalias writable sret([2 x i32]) align 4 %agg.result)
-// CHECK: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK: [[G:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[G]], ptr align 4 {{.*}}, i32 8, i1 false)
// CHECK-NEXT: [[AIB:%.*]] = getelementptr inbounds [2 x i32], ptr %agg.result, i32 0, i32 0
// CHECK-NEXT: br label %arrayinit.body
// CHECK: arrayinit.body:
// CHECK-NEXT: [[AII:%.*]] = phi i32 [ 0, %entry ], [ %arrayinit.next, %arrayinit.body ]
-// CHECK-NEXT: %[[#CV_LOOP:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#C_ENTRY]]) ]
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds i32, ptr [[AIB]], i32 [[AII]]
// CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[G]], i32 0, i32 [[AII]]
// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[AI]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 40f32c28ad5ed..9c42da8962c2d 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -66,7 +66,6 @@ struct UnnamedDerived : UnnamedOnly {};
// CHECK-LABEL: define hidden void @_Z5case1v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case1v.TF1, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -79,7 +78,6 @@ TwoFloats case1() {
// CHECK-LABEL: define hidden void @_Z5case2v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case2v.TF2, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -92,7 +90,6 @@ TwoFloats case2() {
// CHECK-LABEL: define hidden void @_Z5case3i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[VAL:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -113,7 +110,6 @@ TwoFloats case3(int Val) {
// CHECK-LABEL: define hidden void @_Z5case4Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -137,7 +133,6 @@ TwoFloats case4(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case5Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -160,7 +155,6 @@ TwoInts case5(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case69TwoFloats(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 1
@@ -183,7 +177,6 @@ TwoInts case6(TwoFloats TF4) {
// CHECK-LABEL: define hidden void @_Z5case77TwoIntsS_i9TwoFloatsS0_S0_S0_(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI1:%.*]], ptr noundef byval([[STRUCT_TWOINTS]]) align 1 [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF3:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -248,7 +241,6 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
// CHECK-LABEL: define hidden void @_Z5case85Doggo(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE]], align 1
@@ -335,7 +327,6 @@ AnimalBits case8(Doggo D1) {
// CHECK-LABEL: define hidden void @_Z5case95Doggo10AnimalBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]], ptr noundef byval([[STRUCT_ANIMALBITS:%.*]]) align 1 [[A1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[DOGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ZOO]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[DOGS]], i32 0, i32 0
// CHECK-NEXT: [[LEGSTATE1:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
@@ -752,7 +743,6 @@ Zoo case9(Doggo D1, AnimalBits A1) {
// CHECK-LABEL: define hidden void @_Z6case109TwoFloatsS_(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X1]], align 1
@@ -780,7 +770,6 @@ FourFloats case10(TwoFloats TF1, TwoFloats TF2) {
// CHECK-LABEL: define hidden void @_Z6case11f(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca <4 x float>, align 4
// CHECK-NEXT: [[REF_TMP1:%.*]] = alloca <4 x float>, align 4
@@ -830,7 +819,6 @@ FourFloats case11(float F) {
// CHECK-LABEL: define hidden void @_Z6case12ii(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
@@ -853,7 +841,6 @@ SlicyBits case12(int I, int J) {
// CHECK-LABEL: define hidden void @_Z6case137TwoInts(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
@@ -874,7 +861,6 @@ SlicyBits case13(TwoInts TI) {
// CHECK-LABEL: define hidden void @_Z6case149SlicyBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
@@ -895,7 +881,6 @@ TwoInts case14(SlicyBits SB) {
// CHECK-LABEL: define hidden void @_Z6case159SlicyBits(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
@@ -919,7 +904,6 @@ TwoFloats case15(SlicyBits SB) {
// CHECK-LABEL: define hidden void @_Z7makeTwoRf(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noalias noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -946,7 +930,6 @@ TwoFloats makeTwo(inout float X) {
// CHECK-LABEL: define hidden void @_Z6case16v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
@@ -980,7 +963,6 @@ FourFloats case16() {
// CHECK-LABEL: define hidden noundef i32 @_Z12case17Helperi(
// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
@@ -994,7 +976,6 @@ int case17Helper(int x) {
// CHECK-LABEL: define hidden void @_Z6case17v(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z12case17Helperi(i32 noundef 0) #[[ATTR2]]
// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z12case17Helperi(i32 nou...
[truncated]
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/40936 Here is the relevant piece of the build log for the reference |
…g DirectX" (llvm#193090) This change appears to introduce complications when trying to do a full loop unroll that is exhibited here: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. This results in invalid DXIL as the unreachable branch is not correctly cleaned up. Initial leads look like this is because the instructions with convergence control tokens are still being used for analysis when they are within an unreachable branch. Reverts llvm#188792
This change appears to introduce complications when trying to do a full loop unroll that is exhibited here: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. This results in invalid DXIL as the unreachable branch is not correctly cleaned up.
Initial leads look like this is because the instructions with convergence control tokens are still being used for analysis when they are within an unreachable branch.
Reverts #188792