diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 3d75d7455101f..a077d82348e5e 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -8,6 +8,7 @@ #include "DXILResourceAccess.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/Frontend/HLSL/HLSLResource.h" @@ -22,6 +23,7 @@ #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -57,7 +59,7 @@ static Value *traverseGEPOffsets(const DataLayout &DL, IRBuilder<> &Builder, GEPOffset = *GEP->idx_begin(); } else if (NumIndices == 2) { // If we have two indices, this should be an access through a pointer. - auto IndexIt = GEP->idx_begin(); + auto *IndexIt = GEP->idx_begin(); assert(cast(IndexIt)->getZExtValue() == 0 && "GEP is not indexing through pointer"); GEPOffset = *(++IndexIt); @@ -419,6 +421,204 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, llvm_unreachable("Unhandled case in switch"); } +static Instruction *getPointerOperand(Instruction *AI) { + if (auto *LI = dyn_cast(AI)) + return dyn_cast(LI->getPointerOperand()); + if (auto *SI = dyn_cast(AI)) + return dyn_cast(SI->getPointerOperand()); + + return nullptr; +} + +static const std::array HandleIntrins = { + Intrinsic::dx_resource_handlefrombinding, + Intrinsic::dx_resource_handlefromimplicitbinding, +}; + +static SmallVector collectUsedHandles(Value *Ptr) { + SmallVector Worklist = {Ptr}; + SmallVector Handles; + + while (!Worklist.empty()) { + Value *X = Worklist.pop_back_val(); + + if (!X->getType()->isPointerTy() && !X->getType()->isTargetExtTy()) + return {}; // Early exit on store/load into non-resource + + if (auto *Phi = dyn_cast(X)) + for (Use &V : Phi->incoming_values()) + Worklist.push_back(V.get()); + else if (auto *Select = dyn_cast(X)) + for (Value *V : {Select->getTrueValue(), Select->getFalseValue()}) + Worklist.push_back(V); + else if (auto *II = dyn_cast(X)) { + Intrinsic::ID IID = II->getIntrinsicID(); + + if (IID == Intrinsic::dx_resource_getpointer) + Worklist.push_back(II->getArgOperand(/*Handle=*/0)); + + if (llvm::is_contained(HandleIntrins, IID)) + Handles.push_back(II); + } + } + + return Handles; +} + +static hlsl::Binding getHandleIntrinsicBinding(IntrinsicInst *Handle, + DXILResourceTypeMap &DRTM) { + assert(llvm::is_contained(HandleIntrins, Handle->getIntrinsicID()) && + "Only expects a Handle as determined from collectUsedHandles."); + + auto *HandleTy = cast(Handle->getType()); + dxil::ResourceClass Class = DRTM[HandleTy].getResourceClass(); + uint32_t Space = cast(Handle->getArgOperand(0))->getZExtValue(); + uint32_t LowerBound = + cast(Handle->getArgOperand(1))->getZExtValue(); + int32_t Size = cast(Handle->getArgOperand(2))->getZExtValue(); + uint32_t UpperBound = Size < 0 ? UINT32_MAX : LowerBound + Size - 1; + + return hlsl::Binding(Class, Space, LowerBound, UpperBound, nullptr); +} + +namespace { +/// Helper for propogating the current handle and ptr indicies. +struct AccessIndicies { + Value *GetPtrIdx; + Value *HandleIdx; + + bool hasGetPtrIdx() { return GetPtrIdx != nullptr; } +}; +} // namespace + +// getAccessIndicies traverses up the control flow that a ptr came from and +// propagates back the indicies used to access the resource (AccessIndicies): +// +// - GetPtrIdx is the index of dx.resource.getpointer +// - HandleIdx is the index of dx.resource.handlefrom.* +static AccessIndicies +getAccessIndicies(Instruction *I, + SmallSetVector &DeadInsts) { + if (auto *II = dyn_cast(I)) { + if (llvm::is_contained(HandleIntrins, II->getIntrinsicID())) { + DeadInsts.insert(II); + return {nullptr, II->getArgOperand(/*Index=*/3)}; + } + + if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) { + auto *V = dyn_cast(II->getArgOperand(/*Handle=*/0)); + auto AccessIdx = getAccessIndicies(V, DeadInsts); + assert(!AccessIdx.hasGetPtrIdx() && + "Encountered multiple dx.resource.getpointers in ptr chain?"); + AccessIdx.GetPtrIdx = II->getArgOperand(1); + + DeadInsts.insert(II); + return AccessIdx; + } + } + + if (auto *Phi = dyn_cast(I)) { + unsigned NumEdges = Phi->getNumIncomingValues(); + assert(NumEdges != 0 && "Malformed Phi Node"); + + IRBuilder<> Builder(Phi); + PHINode *GetPtrPhi = PHINode::Create(Builder.getInt32Ty(), NumEdges); + PHINode *HandlePhi = PHINode::Create(Builder.getInt32Ty(), NumEdges); + + bool HasGetPtr = true; + for (unsigned I = 0; I < NumEdges; I++) { + auto *BB = Phi->getIncomingBlock(I); + auto *V = dyn_cast(Phi->getIncomingValue(I)); + auto AccessIdx = getAccessIndicies(V, DeadInsts); + HasGetPtr &= AccessIdx.hasGetPtrIdx(); + if (HasGetPtr) + GetPtrPhi->addIncoming(AccessIdx.GetPtrIdx, BB); + HandlePhi->addIncoming(AccessIdx.HandleIdx, BB); + } + + if (HasGetPtr) + Builder.Insert(GetPtrPhi); + else + GetPtrPhi = nullptr; + + Builder.Insert(HandlePhi); + + DeadInsts.insert(Phi); + return {GetPtrPhi, HandlePhi}; + } + + if (auto *Select = dyn_cast(I)) { + auto *TrueV = dyn_cast(Select->getTrueValue()); + auto TrueAccessIdx = getAccessIndicies(TrueV, DeadInsts); + + auto *FalseV = dyn_cast(Select->getFalseValue()); + auto FalseAccessIdx = getAccessIndicies(FalseV, DeadInsts); + + IRBuilder<> Builder(Select); + Value *GetPtrSelect = nullptr; + + if (TrueAccessIdx.hasGetPtrIdx() && FalseAccessIdx.hasGetPtrIdx()) + GetPtrSelect = + Builder.CreateSelect(Select->getCondition(), TrueAccessIdx.GetPtrIdx, + FalseAccessIdx.GetPtrIdx); + + auto *HandleSelect = + Builder.CreateSelect(Select->getCondition(), TrueAccessIdx.HandleIdx, + FalseAccessIdx.HandleIdx); + DeadInsts.insert(Select); + return {GetPtrSelect, HandleSelect}; + } + + llvm_unreachable("collectUsedHandles should assure this does not occur"); +} + +static void +replaceHandleWithIndicies(Instruction *Ptr, IntrinsicInst *OldHandle, + SmallSetVector &DeadInsts) { + auto AccessIdx = getAccessIndicies(Ptr, DeadInsts); + + IRBuilder<> Builder(Ptr); + IntrinsicInst *Handle = cast(OldHandle->clone()); + Handle->setArgOperand(/*Index=*/3, AccessIdx.HandleIdx); + Builder.Insert(Handle); + + auto *GetPtr = + Builder.CreateIntrinsic(Ptr->getType(), Intrinsic::dx_resource_getpointer, + {Handle, AccessIdx.GetPtrIdx}); + + Ptr->replaceAllUsesWith(GetPtr); + DeadInsts.insert(Ptr); +} + +static bool tryReplaceHandlesWithIndices(Function &F, + DXILResourceTypeMap &DRTM) { + SmallSetVector DeadInsts; + for (BasicBlock &BB : make_early_inc_range(F)) + for (Instruction &I : BB) + if (auto *PtrOp = getPointerOperand(&I)) { + SmallVector Handles = collectUsedHandles(PtrOp); + unsigned NumHandles = Handles.size(); + if (NumHandles <= 1) + continue; // No-replacement required + + bool CanReplace = true; + hlsl::Binding B = getHandleIntrinsicBinding(Handles[0], DRTM); + for (unsigned I = 1; I < NumHandles; I++) + CanReplace &= (B == getHandleIntrinsicBinding(Handles[I], DRTM)); + + if (CanReplace) + replaceHandleWithIndicies(PtrOp, Handles[0], DeadInsts); + } + + bool MadeChanges = DeadInsts.size() > 0; + + for (auto *I : llvm::reverse(DeadInsts)) + if (I->hasNUses(0)) // Handle maybe used elsewhere aside from replaced path + I->eraseFromParent(); + + return MadeChanges; +} + static SmallVector collectBlockUseDef(Instruction *Start) { SmallPtrSet Visited; SmallVector Worklist; @@ -525,6 +725,42 @@ static void phiNodeReplacement(IntrinsicInst *II, CurrBBDeadInsts.clear(); } +static bool hoistGetPtrUses(Function &F, DXILResourceTypeMap &DRTM) { + SetVector DeadBB; + SmallVector PrevBBDeadInsts; + + for (BasicBlock &BB : make_early_inc_range(F)) + for (Instruction &I : make_early_inc_range(BB)) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) + phiNodeReplacement(II, PrevBBDeadInsts, DeadBB); + + bool MadeChanges = DeadBB.size() > 0; + + for (auto *Dead : PrevBBDeadInsts) + Dead->eraseFromParent(); + PrevBBDeadInsts.clear(); + for (auto *Dead : DeadBB) + Dead->eraseFromParent(); + DeadBB.clear(); + + return MadeChanges; +} + +static bool legalizeResourceHandles(Function &F, DXILResourceTypeMap &DRTM) { + // Try to replace dx.resource.handlefrom.*.binding and dx.resource.getpointer + // calls with their respective index values and propogate the index values to + // be used at resource access. This legalizes the use of handles when: + // - A local resource is created from an Index into a global binding + // - GVN sink of store/load of a ptr/handle + bool MadeReplacements = tryReplaceHandlesWithIndices(F, DRTM); + // Since a Convergent op can't sink through control flow, and GVN is handled + // above, we can now undo any InstCombine optimizations that caused a + // dx.resource.getpointer ptr to sink by hoisting it back up. + bool MadeHoistChanges = hoistGetPtrUses(F, DRTM); + return MadeReplacements || MadeHoistChanges; +} + static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) { SmallVector Worklist; for (User *U : II->users()) @@ -560,27 +796,13 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) { static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) { SmallVector> Resources; - SetVector DeadBB; - SmallVector PrevBBDeadInsts; - for (BasicBlock &BB : make_early_inc_range(F)) { - for (Instruction &I : make_early_inc_range(BB)) - if (auto *II = dyn_cast(&I)) - if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) - phiNodeReplacement(II, PrevBBDeadInsts, DeadBB); - + for (BasicBlock &BB : make_early_inc_range(F)) for (Instruction &I : BB) if (auto *II = dyn_cast(&I)) if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) { auto *HandleTy = cast(II->getArgOperand(0)->getType()); Resources.emplace_back(II, DRTM[HandleTy]); } - } - for (auto *Dead : PrevBBDeadInsts) - Dead->eraseFromParent(); - PrevBBDeadInsts.clear(); - for (auto *Dead : DeadBB) - Dead->eraseFromParent(); - DeadBB.clear(); for (auto &[II, RI] : Resources) replaceAccess(II, RI); @@ -595,8 +817,9 @@ PreservedAnalyses DXILResourceAccess::run(Function &F, MAMProxy.getCachedResult(*F.getParent()); assert(DRTM && "DXILResourceTypeAnalysis must be available"); - bool MadeChanges = transformResourcePointers(F, *DRTM); - if (!MadeChanges) + bool MadeHandleChanges = legalizeResourceHandles(F, *DRTM); + bool MadeResourceChanges = transformResourcePointers(F, *DRTM); + if (!(MadeHandleChanges || MadeResourceChanges)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -611,7 +834,9 @@ class DXILResourceAccessLegacy : public FunctionPass { bool runOnFunction(Function &F) override { DXILResourceTypeMap &DRTM = getAnalysis().getResourceTypeMap(); - return transformResourcePointers(F, DRTM); + bool MadeHandleChanges = legalizeResourceHandles(F, DRTM); + bool MadeResourceChanges = transformResourcePointers(F, DRTM); + return MadeHandleChanges || MadeResourceChanges; } StringRef getPassName() const override { return "DXIL Resource Access"; } DXILResourceAccessLegacy() : FunctionPass(ID) {} diff --git a/llvm/test/CodeGen/DirectX/phi-node-replacement.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/hoist-from-getptr.ll similarity index 100% rename from llvm/test/CodeGen/DirectX/phi-node-replacement.ll rename to llvm/test/CodeGen/DirectX/ResourceAccess/hoist-from-getptr.ll diff --git a/llvm/test/CodeGen/DirectX/issue-152348.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/issue-152348.ll similarity index 100% rename from llvm/test/CodeGen/DirectX/issue-152348.ll rename to llvm/test/CodeGen/DirectX/ResourceAccess/issue-152348.ll diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll new file mode 100644 index 0000000000000..8991536bd4797 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll @@ -0,0 +1,219 @@ +; RUN: opt -S -dxil-resource-type -dxil-resource-access -disable-verify \ +; RUN: -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; The file contains examples of hlsl snippets that will generate invalid dxil +; resource access, either through code-gen or by an InstCombine/GVN sink +; optimization + +; NOTE: The below resources are generated with: +; +; RWBuffer In : register(u0); +; RWStructuredBuffer Out0 : register(u1); +; RWStructuredBuffer Out1 : register(u2); +; RWStructuredBuffer OutArr[]; + +; cbuffer c { +; bool cond; +; }; + +%__cblayout_c = type <{ i32 }> + +@.str = internal unnamed_addr constant [3 x i8] c"In\00", align 1 +@.str.2 = internal unnamed_addr constant [5 x i8] c"Out0\00", align 1 +@.str.3 = internal unnamed_addr constant [5 x i8] c"Out1\00", align 1 +@c.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_c) poison +@c.str = internal unnamed_addr constant [2 x i8] c"c\00", align 1 +@OutArr.str = internal unnamed_addr constant [7 x i8] c"OutArr\00", align 1 + +; Local select into global resource array: +; +; RWStructuredBuffer Out = cond ? OutArr[0] : OutArr[1]; +; Out[GI] = WaveActiveMax(In[GI]); +; +; CHECK-LABEL: @select_global_resource_array() +define void @select_global_resource_array() { +entry: + %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str) + store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4 + %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4 + %0 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0) + %1 = load i32, ptr addrspace(2) %0, align 4 + %loadedv.i = trunc nuw i32 %1 to i1 + br i1 %loadedv.i, label %cond.true.i, label %cond.false.i + +cond.true.i: +; CHECK: cond.true.i: +; CHECK-NEXT: br label %cond.end.i + %2 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + br label %cond.end.i + +cond.false.i: +; CHECK: cond.false.i: +; CHECK-NEXT: br label %cond.end.i + %3 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + br label %cond.end.i + +cond.end.i: +; CHECK: cond.end.i +; CHECK-NEXT: %[[HANDLE_IDX:.*]] = phi i32 [ 0, %cond.true.i ], [ 1, %cond.false.i ] +; CHECK: %[[TID:.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() +; CHECK: %[[WAVE_MAX:.*]] = tail call i32 @llvm.dx.wave.reduce.max.i32(i32 %{{.*}}) +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) +; CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[HANDLE_IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[TID]], i32 0, i32 %[[WAVE_MAX]]) +; CHECK-NEXT: ret void + %cond.i.sroa.speculated = phi target("dx.RawBuffer", i32, 1, 0) [ %2, %cond.true.i ], [ %3, %cond.false.i ] + %4 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %5 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %6 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %4, i32 %5) + %7 = load i32, ptr %6, align 4 + %hlsl.wave.active.max.i = tail call i32 @llvm.dx.wave.reduce.max.i32(i32 %7) + %8 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %cond.i.sroa.speculated, i32 %5) + store i32 %hlsl.wave.active.max.i, ptr %8, align 4 + ret void +} + +; GVN Sink of handle ptr +; +; if (cond) { +; Out0[GI] = WaveActiveSum(In[GI]); +; } else { +; Out0[0] = In[GI]; +; } +; Out0[GI] = WaveActiveSum(In[GI]); +; +; CHECK-LABEL: @gvn_sink() +define void @gvn_sink() { +entry: + %0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 0, ptr nonnull @.str.2) + %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str) + store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4 + %2 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4 + %3 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0) + %4 = load i32, ptr addrspace(2) %3, align 4 + %loadedv.i = trunc nuw i32 %4 to i1 + %5 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %0, i32 %2) + %6 = load i32, ptr %5, align 4 + br i1 %loadedv.i, label %if.then.i, label %if.else.i + +if.then.i: + %hlsl.wave.active.sum.i = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 %6) + %7 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 %2) + store i32 %hlsl.wave.active.sum.i, ptr %7, align 4 + br label %_Z4mainj.exit + +if.else.i: + %8 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 0) + store i32 %6, ptr %8, align 4 + %.pre = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 %2) + br label %_Z4mainj.exit + +_Z4mainj.exit: +; CHECK: _Z4mainj.exit: +; CHECK-NEXT: %[[TID:.*]] = phi i32 [ %2, %if.then.i ], [ %2, %if.else.i ] +; CHECK-NEXT: %[[HANDLE_IDX:.*]] = phi i32 [ 0, %if.then.i ], [ 0, %if.else.i ] +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) +; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 %[[HANDLE_IDX]], ptr nonnull @.str.2) +; CHECK: %[[WAVE_SUM:.*]] = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 {{.*}}) +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32( +; CHECK-SAME: target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[TID]], i32 0, i32 %[[WAVE_SUM]]) +; CHECK-NEXT: ret void + %.pre-phi1 = phi ptr [ %7, %if.then.i ], [ %.pre, %if.else.i ] + %9 = load i32, ptr %5, align 4 + %hlsl.wave.active.sum5.i = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 %9) + store i32 %hlsl.wave.active.sum5.i, ptr %.pre-phi1, align 4 + ret void +} + +; Using a local array of global resources +; +; RWStructuredBuffer Outs[2] = {OutArr[0], OutArr[1]}; +; Outs[cond ? 0 : 1][GI] = In[GI]; +; +; CHECK-LABEL: @local_array_of_global_resources() +define void @local_array_of_global_resources() { +entry: + %0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str) + store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4 + %1 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %2 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + %3 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + %4 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %0, i32 %1) + %5 = load i32, ptr %4, align 4 + %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4 + %6 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0) + %7 = load i32, ptr addrspace(2) %6, align 4 + %loadedv.i = trunc nuw i32 %7 to i1 + +; CHECK: %[[TID:.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() +; CHECK: %[[HANDLE_IDX:.*]] = select i1 %loadedv.i, i32 0, i32 1 +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) +; CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[HANDLE_IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[TID]], i32 0, i32 {{.*}}) + %.sroa.speculated = select i1 %loadedv.i, target("dx.RawBuffer", i32, 1, 0) %2, target("dx.RawBuffer", i32, 1, 0) %3 + %8 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %.sroa.speculated, i32 %1) + store i32 %5, ptr %8, align 4 + ret void +} + +; Sink of a load/store +; +; if (cond) { +; Out0[GI] += In[GI]; +; } else { +; Out1[GI] += In[GI]; +; } +; +; CHECK-LABEL: @sink_load_store() +define void @sink_load_store() { +entry: +; CHECK: %[[IN_HANDLE:.*]] = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str) + store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4 +; CHECK: %[[TID:.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %1 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4 + %2 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0) + %3 = load i32, ptr addrspace(2) %2, align 4 + %loadedv.i = trunc nuw i32 %3 to i1 +; CHECK: %[[IN_LOAD:.*]] = call { i32, i1 } @llvm.dx.resource.load.typedbuffer.i32.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %[[IN_HANDLE]], i32 %[[TID]]) +; CHECK: %[[IN_X:.*]] = extractvalue { i32, i1 } %[[IN_LOAD]], 0 + %4 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %0, i32 %1) + %5 = load i32, ptr %4, align 4 + br i1 %loadedv.i, label %if.then.i, label %if.else.i + +if.then.i: +; CHECK: if.then.i: +; CHECK-NEXT: %[[HANDLE0:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 0, ptr nonnull @.str.2) +; CHECK-NEXT: %[[LOAD0:.*]] = call { i32, i1 } @llvm.dx.resource.load.rawbuffer.i32.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE0]], i32 %[[TID]], i32 0) +; CHECK-NEXT: %[[X0:.*]] = extractvalue { i32, i1 } %[[LOAD0]], 0 +; CHECK-NEXT: %[[ADD0:.*]] = add i32 %[[X0]], %[[IN_X]] +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE0]], i32 %[[TID]], i32 0, i32 %[[ADD0]]) +; CHECK-NEXT: br label %_Z4mainj.exit + %6 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 0, ptr nonnull @.str.2) + %7 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %6, i32 %1) + br label %_Z4mainj.exit + +if.else.i: +; CHECK: if.else.i: +; CHECK-NEXT: %[[HANDLE1:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.3) +; CHECK-NEXT: %[[LOAD1:.*]] = call { i32, i1 } @llvm.dx.resource.load.rawbuffer.i32.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE1]], i32 %[[TID]], i32 0) +; CHECK-NEXT: %[[X1:.*]] = extractvalue { i32, i1 } %[[LOAD1]], 0 +; CHECK-NEXT: %[[ADD1:.*]] = add i32 %[[X1]], %[[IN_X]] +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE1]], i32 %[[TID]], i32 0, i32 %[[ADD1]]) +; CHECK-NEXT: br label %_Z4mainj.exit + %8 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.3) + %9 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %8, i32 %1) + br label %_Z4mainj.exit + +_Z4mainj.exit: + %.sink = phi ptr [ %7, %if.then.i ], [ %9, %if.else.i ] + %10 = load i32, ptr %.sink, align 4 + %add.i = add i32 %10, %5 + store i32 %add.i, ptr %.sink, align 4 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-to-index.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-to-index.ll new file mode 100644 index 0000000000000..1e7304a7232fc --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-to-index.ll @@ -0,0 +1,161 @@ +; RUN: opt -S -dxil-resource-type -dxil-resource-access -disable-verify \ +; RUN: -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +@OutArr.str = internal unnamed_addr constant [7 x i8] c"OutArr\00", align 1 + +; CHECK-LABEL: handle_phi_load( +; CHECK-SAME: i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define i32 @handle_phi_load(i1 %cond, i32 %a, i32 %b) { +; CHECK-NOT: handlefromimplicitbinding +; CHECK: main: +; CHECK-NEXT: %[[IDX:.*]] = phi i32 [ 0, %entry ], [ 1, %if.then.i ] +; CHECK-NEXT: %[[C:.*]] = phi i32 [ %[[A]], %entry ], [ %[[B]], %if.then.i ] +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: %[[LOAD:.*]] = call { i32, i1 } @llvm.dx.resource.load.rawbuffer.i32.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[C]], i32 0) +; CHECK-NEXT: %[[X:.*]] = extractvalue { i32, i1 } %[[LOAD]], 0 +; CHECK-NEXT: ret i32 %[[X]] +entry: + %handle0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + br i1 %cond, label %if.then.i, label %main + +if.then.i: + %handle1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + br label %main + +main: + %handle_phi = phi target("dx.RawBuffer", i32, 1, 0) [ %handle0, %entry ], [ %handle1, %if.then.i ] + %c = phi i32 [ %a, %entry ], [ %b, %if.then.i ] + %ptr = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle_phi, i32 %c) + %x = load i32, ptr %ptr, align 4 + ret i32 %x +} + +; CHECK-LABEL: handle_select_store( +; CHECK-SAME: i32 %[[X:.*]], i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define void @handle_select_store(i32 %x, i1 %cond, i32 %a, i32 %b) { +; CHECK-NOT: handlefromimplicitbinding +; CHECK: entry: +; CHECK-NEXT: %[[IDX:.*]] = select i1 %[[COND]], i32 0, i32 1 +; CHECK-NEXT: %[[C:.*]] = select i1 %cond, i32 %[[A]], i32 %[[B]] +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[C]], i32 0, i32 %[[X]]) +; CHECK-NEXT: ret void +entry: + %handle0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + %handle1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + %handle = select i1 %cond, target("dx.RawBuffer", i32, 1, 0) %handle0, target("dx.RawBuffer", i32, 1, 0) %handle1 + %c = select i1 %cond, i32 %a, i32 %b + %ptr = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle, i32 %c) + store i32 %x, ptr %ptr, align 4 + ret void +} + +; CHECK-LABEL: ptr_phi_store( +; CHECK-SAME: i32 %[[X:.*]], i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define void @ptr_phi_store(i32 %x, i1 %cond, i32 %a, i32 %b) { +; CHECK-NOT: handlefromimplicitbinding +; CHECK: main: +; CHECK-NEXT: %[[C:.*]] = phi i32 [ %[[A]], %entry ], [ %[[B]], %if.then.i ] +; CHECK-NEXT: %[[IDX:.*]] = phi i32 [ 0, %entry ], [ 1, %if.then.i ] +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[C]], i32 0, i32 %[[X]]) +; CHECK-NEXT: ret void +entry: + %handle0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle0, i32 %a) + br i1 %cond, label %if.then.i, label %main + +if.then.i: + %handle1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle1, i32 %b) + br label %main + +main: + %ptr_phi = phi ptr [ %ptr0, %entry ], [ %ptr1, %if.then.i ] + store i32 %x, ptr %ptr_phi, align 4 + ret void +} + +; CHECK-LABEL: ptr_select_load( +; CHECK-SAME: i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define i32 @ptr_select_load(i1 %cond, i32 %a, i32 %b) { +; CHECK-NOT: handlefromimplicitbinding +; CHECK: entry: +; CHECK-NEXT: %[[C:.*]] = select i1 %[[COND]], i32 %[[A]], i32 %[[B]] +; CHECK-NEXT: %[[IDX:.*]] = select i1 %[[COND]], i32 0, i32 1 +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: %[[LOAD:.*]] = call { i32, i1 } @llvm.dx.resource.load.rawbuffer.i32.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[C]], i32 0) +; CHECK-NEXT: %[[X:.*]] = extractvalue { i32, i1 } %[[LOAD]], 0 +; CHECK-NEXT: ret i32 %[[X]] +entry: + %handle0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle0, i32 %a) + %handle1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %handle1, i32 %b) + %ptr = select i1 %cond, ptr %ptr0, ptr %ptr1 + %x = load i32, ptr %ptr, align 4 + ret i32 %x +} + +; CHECK-LABEL: gvn_ptr_store +; CHECK-SAME: i32 %[[X:.*]], i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define void @gvn_ptr_store(i32 %x, i1 %cond, i32 %a, i32 %b) { +; CHECK-NOT: handlefromimplicitbinding +; CHECK: main: +; CHECK-NEXT: %[[C:.*]] = phi i32 [ %a, %entry ], [ %b, %if.then.i ] +; CHECK-NEXT: %[[IDX:.*]] = phi i32 [ 0, %entry ], [ 0, %if.then.i ] +; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 2, i32 0, i32 1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %[[HANDLE]], i32 %[[C]], i32 %[[X]]) +; CHECK-NEXT: ret void +entry: + %handle0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @OutArr.str) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0t(target("dx.TypedBuffer", i32, 1, 0, 1) %handle0, i32 %a) + br i1 %cond, label %if.then.i, label %main + +if.then.i: + %handle1 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @OutArr.str) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0t(target("dx.TypedBuffer", i32, 1, 0, 1) %handle1, i32 %b) + br label %main + +main: + %ptr = phi ptr [ %ptr0, %entry ], [ %ptr1, %if.then.i ] + store i32 %x, ptr %ptr, align 4 + ret void +} + +; CHECK-LABEL: multiple_use_handle +; CHECK-SAME: i32 %[[X:.*]], i1 %[[COND:.*]], i32 %[[A:.*]], i32 %[[B:.*]]) +define void @multiple_use_handle(i32 %x, i1 %cond, i32 %a, i32 %b) { +; %3 = call { i32, i1 } @llvm.dx.resource.load.typedbuffer.i32.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %2, i32 %1) +; %4 = extractvalue { i32, i1 } %3, 0 +; %add = add i32 %4, %x +; call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %handle0, i32 %a, i32 %add) +; ret void +; CHECK: entry: +; CHECK-NEXT: %[[HANDLE0:.*]] = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @OutArr.str) +; CHECK: main: +; CHECK-NEXT: %[[C:.*]] = phi i32 [ %[[A]], %entry ], [ %[[B]], %if.then.i ] +; CHECK-NEXT: %[[IDX:.*]] = phi i32 [ 0, %entry ], [ 0, %if.then.i ] +; CHECK-NEXT: %[[HANDLE1:.*]] = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 2, i32 0, i32 1, i32 %[[IDX]], ptr nonnull @OutArr.str) +; CHECK-NEXT: %[[LOAD:.*]] = call { i32, i1 } @llvm.dx.resource.load.typedbuffer.i32.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %[[HANDLE1]], i32 %[[C]]) +; CHECK-NEXT: %[[Y:.*]] = extractvalue { i32, i1 } %[[LOAD]], 0 +; CHECK-NEXT: %[[ADD:.*]] = add i32 %[[Y]], %[[X]] +; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %[[HANDLE0]], i32 %[[A]], i32 %[[ADD]]) +; CHECK-NEXT: ret void +entry: + %handle0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @OutArr.str) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0t(target("dx.TypedBuffer", i32, 1, 0, 1) %handle0, i32 %a) + br i1 %cond, label %if.then.i, label %main + +if.then.i: + %handle1 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @OutArr.str) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0t(target("dx.TypedBuffer", i32, 1, 0, 1) %handle1, i32 %b) + br label %main + +main: + %ptr = phi ptr [ %ptr0, %entry ], [ %ptr1, %if.then.i ] + %y = load i32, ptr %ptr, align 4 + %add = add i32 %y, %x + store i32 %add, ptr %ptr0, align 4 + ret void +}