Reland "[HLSL][DirectX] Emit convergence control tokens when targeting DirectX"#194452
Conversation
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-llvm-transforms Author: Finn Plummer (inbelic) ChangesThe initial landing surfaced 3 somewhat orthogonal issues related to loop unrolling. These were addressed: here, here and here. These caused these tests to fail in the offload test suite. We can verify that these are now passing as expected (fixing any of the 3 issues would resolve this and allow us to reland) Some additional tests were added since the revert that are now accounted for and updated in the reland fixes commit. Patch is 235.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/194452.diff 76 Files Affected:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index a4282c4f51199..2dba4b98053cb 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -726,6 +726,9 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreatePHI(element->getType(), 2, "arrayinit.cur");
currentElement->addIncoming(element, entryBB);
+ if (CGF.CGM.shouldEmitConvergenceTokens())
+ CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
+
// Emit the actual filler expression.
{
// C++1z [class.temporary]p5:
@@ -757,6 +760,9 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
Builder.CreateCondBr(done, endBB, bodyBB);
currentElement->addIncoming(nextElement, Builder.GetInsertBlock());
+ if (CGF.CGM.shouldEmitConvergenceTokens())
+ CGF.ConvergenceTokenStack.pop_back();
+
CGF.EmitBlock(endBB);
}
}
@@ -1998,6 +2004,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::Value *element =
Builder.CreateInBoundsGEP(llvmElementType, begin, index);
+ if (CGF.CGM.shouldEmitConvergenceTokens())
+ CGF.ConvergenceTokenStack.push_back(CGF.emitConvergenceLoopToken(bodyBB));
+
// Prepare for a cleanup.
QualType::DestructionKind dtorKind = elementType.isDestructedType();
EHScopeStack::stable_iterator cleanup;
@@ -2045,6 +2054,9 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
llvm::BasicBlock *endBB = CGF.createBasicBlock("arrayinit.end");
Builder.CreateCondBr(done, endBB, bodyBB);
+ if (CGF.CGM.shouldEmitConvergenceTokens())
+ CGF.ConvergenceTokenStack.pop_back();
+
CGF.EmitBlock(endBB);
// Leave the partial-array cleanup if we entered one.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index b82a237ecefca..18e705d03cc7e 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -574,9 +574,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *IndexOp = EmitScalarExpr(E->getArg(1));
llvm::Type *RetTy = ConvertType(E->getType());
- return Builder.CreateIntrinsic(
- RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
- ArrayRef<Value *>{HandleOp, IndexOp});
+ llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(),
+ CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
+ {RetTy, HandleOp->getType(), IndexOp->getType()});
+ llvm::CallInst *CI = EmitRuntimeCall(IntrFn, {HandleOp, IndexOp});
+ CI->setCallingConv(IntrFn->getCallingConv());
+ return CI;
}
case Builtin::BI__builtin_hlsl_resource_sample: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index a134f6aab9490..cb006d6858c04 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -757,8 +757,16 @@ CGHLSLRuntime::emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type,
llvm::PoisonValue::get(B.getInt32Ty())};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_load_input;
- llvm::Value *Value = B.CreateIntrinsic(/*ReturnType=*/Type, IntrinsicID, Args,
- nullptr, VariableName);
+
+ SmallVector<OperandBundleDef, 1> OB;
+ if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
+ llvm::Value *bundleArgs[] = {Token};
+ OB.emplace_back("convergencectrl", bundleArgs);
+ }
+
+ llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
+ B.GetInsertBlock()->getModule(), IntrinsicID, {Type});
+ llvm::Value *Value = B.CreateCall(IntrFn, Args, OB, VariableName);
return Value;
}
@@ -776,7 +784,16 @@ void CGHLSLRuntime::emitDXILUserSemanticStore(llvm::IRBuilder<> &B,
Source};
llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_store_output;
- B.CreateIntrinsic(/*ReturnType=*/CGM.VoidTy, IntrinsicID, Args, nullptr);
+
+ SmallVector<OperandBundleDef, 1> OB;
+ if (auto *Token = getConvergenceToken(*B.GetInsertBlock())) {
+ llvm::Value *bundleArgs[] = {Token};
+ OB.emplace_back("convergencectrl", bundleArgs);
+ }
+
+ llvm::Function *IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
+ B.GetInsertBlock()->getModule(), IntrinsicID, {Source->getType()});
+ B.CreateCall(IntrFn, Args, OB);
}
llvm::Value *CGHLSLRuntime::emitUserSemanticLoad(
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 29b87a0616992..d1752b86b6603 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5437,11 +5437,11 @@ class CodeGenFunction : public CodeGenTypeCache {
void maybeAttachRangeForLoad(llvm::LoadInst *Load, QualType Ty,
SourceLocation Loc);
-private:
// Emits a convergence_loop instruction for the given |BB|, with |ParentToken|
// as it's parent convergence instr.
llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB);
+private:
// Adds a convergence_ctrl token with |ParentToken| as parent convergence
// instr to the call |Input|.
llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d62707a3355c9..dc296919aa32f 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1815,7 +1815,7 @@ class CodeGenModule : public CodeGenTypeCache {
bool shouldEmitConvergenceTokens() const {
// TODO: this should probably become unconditional once the controlled
// convergence becomes the norm.
- return getTriple().isSPIRVLogical();
+ return getTriple().isSPIRVLogical() || getTriple().isDXIL();
}
void addUndefinedGlobalForTailCall(
diff --git a/clang/test/CodeGenDirectX/Builtins/dot2add.c b/clang/test/CodeGenDirectX/Builtins/dot2add.c
index 4275a285012b0..bc5073995522e 100644
--- a/clang/test/CodeGenDirectX/Builtins/dot2add.c
+++ b/clang/test/CodeGenDirectX/Builtins/dot2add.c
@@ -8,6 +8,7 @@ typedef half half2 __attribute__((ext_vector_type(2)));
// CHECK-LABEL: define float @test_dot2add(
// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca <2 x half>, align 2
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca float, align 4
diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.logicalptr.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.logicalptr.hlsl
index 9816c79be2a23..e5fa7e62f739d 100644
--- a/clang/test/CodeGenHLSL/ArrayAssignable.logicalptr.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayAssignable.logicalptr.hlsl
@@ -32,6 +32,7 @@ cbuffer CBArrays : register(b0) {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign1v(
// CHECK-DXIL-SAME: ) #[[ATTR2:[0-9]+]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign1v.Arr, i32 8, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
@@ -59,6 +60,7 @@ void arr_assign1() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign2v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign2v.Arr, i32 8, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
@@ -93,6 +95,7 @@ void arr_assign2() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign3v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR2]], ptr align 4 @__const._Z11arr_assign3v.Arr2, i32 16, i1 false)
// CHECK-DXIL-NEXT: [[ARR3:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
@@ -120,6 +123,7 @@ void arr_assign3() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign4v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign4v.Arr, i32 8, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
@@ -151,6 +155,7 @@ void arr_assign4() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign5v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign5v.Arr, i32 8, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x i32]) ptr @llvm.structured.alloca.p0()
@@ -189,6 +194,7 @@ void arr_assign5() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign6v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign6v.Arr, i32 16, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
@@ -222,6 +228,7 @@ void arr_assign6() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign7v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[ARR:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z11arr_assign7v.Arr, i32 16, i1 false)
// CHECK-DXIL-NEXT: [[ARR2:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
@@ -258,6 +265,7 @@ void arr_assign7() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign8v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[C:%.*]] = call elementtype([2 x float]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: [[TMP0:%.*]] = call ptr addrspace(2) (ptr addrspace(2), ...) @llvm.structured.gep.p2(ptr addrspace(2) elementtype(<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>) @c1, i32 0, i32 0, i32 0)
// CHECK-DXIL-NEXT: [[TMP1:%.*]] = call ptr (ptr, ...) @llvm.structured.gep.p0(ptr elementtype([2 x float]) [[C]], i32 0)
@@ -292,6 +300,7 @@ void arr_assign8() {
// CHECK-DXIL-LABEL: define hidden void @_Z11arr_assign9v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[C:%.*]] = call elementtype([2 x <4 x i32>]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: [[TMP0:%.*]] = call ptr addrspace(2) (ptr addrspace(2), ...) @llvm.structured.gep.p2(ptr addrspace(2) elementtype([2 x <4 x i32>]) @c2, i32 0)
// CHECK-DXIL-NEXT: [[TMP1:%.*]] = call ptr (ptr, ...) @llvm.structured.gep.p0(ptr elementtype([2 x <4 x i32>]) [[C]], i32 0)
@@ -329,6 +338,7 @@ void arr_assign9() {
// CHECK-DXIL-LABEL: define hidden void @_Z12arr_assign10v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[C:%.*]] = call elementtype([2 x [2 x i32]]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: [[TMP0:%.*]] = call ptr addrspace(2) (ptr addrspace(2), ...) @llvm.structured.gep.p2(ptr addrspace(2) elementtype(<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>) @c3, i32 0, i32 0, i32 0)
// CHECK-DXIL-NEXT: [[TMP1:%.*]] = call ptr (ptr, ...) @llvm.structured.gep.p0(ptr elementtype([2 x [2 x i32]]) [[C]], i32 0)
@@ -387,6 +397,7 @@ void arr_assign10() {
// CHECK-DXIL-LABEL: define hidden void @_Z12arr_assign11v(
// CHECK-DXIL-SAME: ) #[[ATTR2]] {
// CHECK-DXIL-NEXT: [[ENTRY:.*:]]
+// CHECK-DXIL-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-DXIL-NEXT: [[C:%.*]] = call elementtype([2 x [[STRUCT_S:%.*]]]) ptr @llvm.structured.alloca.p0()
// CHECK-DXIL-NEXT: [[TMP0:%.*]] = call ptr addrspace(2) (ptr addrspace(2), ...) @llvm.structured.gep.p2(ptr addrspace(2) elementtype(<{ [1 x <{ [[S:%.*]], target("dx.Padding", 8) }>], [[S]] }>) @c4, i32 0, i32 0, i32 0)
// CHECK-DXIL-NEXT: [[TMP1:%.*]] = call ptr (ptr, ...) @llvm.structured.gep.p0(ptr elementtype([2 x [[STRUCT_S]]]) [[C]], i32 0)
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
index 832c4ac9b10f5..b4235eed318e4 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayReturn.hlsl
@@ -3,12 +3,14 @@
typedef int Foo[2];
// CHECK-LABEL: define void {{.*}}boop{{.*}}(ptr dead_on_unwind noalias writable sret([2 x i32]) align 4 %agg.result)
+// CHECK: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK: [[G:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[G]], ptr align 4 {{.*}}, i32 8, i1 false)
// CHECK-NEXT: [[AIB:%.*]] = getelementptr inbounds [2 x i32], ptr %agg.result, i32 0, i32 0
// CHECK-NEXT: br label %arrayinit.body
// CHECK: arrayinit.body:
// CHECK-NEXT: [[AII:%.*]] = phi i32 [ 0, %entry ], [ %arrayinit.next, %arrayinit.body ]
+// CHECK-NEXT: %[[#CV_LOOP:]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#C_ENTRY]]) ]
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds i32, ptr [[AIB]], i32 [[AII]]
// CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[G]], i32 0, i32 [[AII]]
// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[AI]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index e9661a6e2b8be..c8960fecb80e8 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -66,6 +66,7 @@ struct UnnamedDerived : UnnamedOnly {};
// CHECK-LABEL: define hidden void @_Z5case1v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case1v.TF1, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -78,6 +79,7 @@ TwoFloats case1() {
// CHECK-LABEL: define hidden void @_Z5case2v(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case2v.TF2, i32 8, i1 false)
// CHECK-NEXT: ret void
//
@@ -90,6 +92,7 @@ TwoFloats case2() {
// CHECK-LABEL: define hidden void @_Z5case3i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[VAL:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -110,6 +113,7 @@ TwoFloats case3(int Val) {
// CHECK-LABEL: define hidden void @_Z5case4Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -133,6 +137,7 @@ TwoFloats case4(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case5Dv2_i(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -155,6 +160,7 @@ TwoInts case5(int2 TwoVals) {
// CHECK-LABEL: define hidden void @_Z5case69TwoFloats(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 1
@@ -177,6 +183,7 @@ TwoInts case6(TwoFloats TF4) {
// CHECK-LABEL: define hidden void @_Z5case77TwoIntsS_i9TwoFloatsS0_S0_S0_(
// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI1:%.*]], ptr noundef byval([[STRUCT_TWOINTS]]) align 1 [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF3:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
// CHECK-NEXT: [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 0
@@ -241,6 +248,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
The initial landing surfaced 3 somewhat orthogonal issues related to loop unrolling. These are addressed: here, here and here.
These caused these tests to fail in the offload test suite.
We can verify that these are now passing as expected (fixing any of the 3 issues would resolve this and allow us to reland)
Some additional tests were added since the revert that are now accounted for and updated in the reland fixes commit.
This relands #188792