[DirectX][ResourceAccess] Legalize resource handles into unique global resources#176797
[DirectX][ResourceAccess] Legalize resource handles into unique global resources#176797inbelic wants to merge 9 commits into
Conversation
|
@llvm/pr-subscribers-backend-directx Author: Finn Plummer (inbelic) ChangesThis change replaces the propagation of handles (or corresponding ptr) through control flow with an index into the unique global resource it is accessing. This legalizes code-generation of handle selection and various optimizations on the created ptr into the resource, as illustrated here. This specifically resolves all indexing into a global resource array, but also undoes the sink of a handle/ptr in the GVN pass. By handling GVN, this resolves #165288. Note: This does not handle the cases when a local resource appears to be assigned into by different global resources. To guarantee correct code-generation we are dependent on the assumptions provided by: #176793 and the existing Patch is 37.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176797.diff 5 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 3d75d7455101f..22f39f964b99a 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -57,7 +58,7 @@ static Value *traverseGEPOffsets(const DataLayout &DL, IRBuilder<> &Builder,
GEPOffset = *GEP->idx_begin();
} else if (NumIndices == 2) {
// If we have two indices, this should be an access through a pointer.
- auto IndexIt = GEP->idx_begin();
+ auto *IndexIt = GEP->idx_begin();
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
"GEP is not indexing through pointer");
GEPOffset = *(++IndexIt);
@@ -419,6 +420,196 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI,
llvm_unreachable("Unhandled case in switch");
}
+static Instruction *getPointerOperand(Instruction *AI) {
+ if (auto *LI = dyn_cast<LoadInst>(AI))
+ return dyn_cast<Instruction>(LI->getPointerOperand());
+ if (auto *SI = dyn_cast<StoreInst>(AI))
+ return dyn_cast<Instruction>(SI->getPointerOperand());
+
+ return nullptr;
+}
+
+static const std::array<Intrinsic::ID, 2> HandleIntrins = {
+ Intrinsic::dx_resource_handlefrombinding,
+ Intrinsic::dx_resource_handlefromimplicitbinding,
+};
+
+static SmallVector<IntrinsicInst *> collectUsedHandles(Value *Ptr) {
+ SmallVector<Value *> Worklist = {Ptr};
+ SmallVector<IntrinsicInst *> Handles;
+
+ while (!Worklist.empty()) {
+ Value *X = Worklist.pop_back_val();
+
+ if (!X->getType()->isPointerTy() && !X->getType()->isTargetExtTy())
+ return {}; // Early exit on store/load into non-resource
+
+ if (auto *Phi = dyn_cast<PHINode>(X))
+ for (Use &V : Phi->incoming_values())
+ Worklist.push_back(V.get());
+ else if (auto *Select = dyn_cast<SelectInst>(X))
+ for (Value *V : {Select->getTrueValue(), Select->getFalseValue()})
+ Worklist.push_back(V);
+ else if (auto *II = dyn_cast<IntrinsicInst>(X)) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+
+ if (IID == Intrinsic::dx_resource_getpointer)
+ Worklist.push_back(II->getArgOperand(/*Handle=*/0));
+
+ if (llvm::is_contained(HandleIntrins, IID))
+ Handles.push_back(II);
+ }
+ }
+
+ return Handles;
+}
+
+static hlsl::Binding getBinding(IntrinsicInst *Handle,
+ DXILResourceTypeMap &DRTM) {
+ auto *HandleTy = cast<TargetExtType>(Handle->getType());
+ dxil::ResourceClass Class = DRTM[HandleTy].getResourceClass();
+ uint32_t Space = cast<ConstantInt>(Handle->getArgOperand(0))->getZExtValue();
+ uint32_t LowerBound =
+ cast<ConstantInt>(Handle->getArgOperand(1))->getZExtValue();
+ int32_t Size = cast<ConstantInt>(Handle->getArgOperand(2))->getZExtValue();
+ uint32_t UpperBound = Size < 0 ? UINT32_MAX : LowerBound + Size - 1;
+
+ return hlsl::Binding(Class, Space, LowerBound, UpperBound, nullptr);
+}
+
+static bool haveCommonBinding(ArrayRef<IntrinsicInst *> Handles,
+ DXILResourceTypeMap &DRTM) {
+ unsigned NumHandles = Handles.size();
+ if (NumHandles <= 1)
+ return false; // No-legalization required
+
+ hlsl::Binding B = getBinding(Handles[0], DRTM);
+ for (unsigned I = 1; I < NumHandles; I++)
+ if (B != getBinding(Handles[I], DRTM))
+ return false; // No-legalization is possible
+
+ return true;
+}
+
+// getHandleIndicies traverses up the control flow that a ptr came from and
+// propogates back the GetPtrIdx and HandleIdx:
+//
+// - GetPtrIdx is the index of dx.resource.getpointer
+// - HandleIdx is the index of dx.resource.handlefrom.*
+static std::pair<Value *, Value *>
+getHandleIndicies(Instruction *I,
+ SmallSetVector<Instruction *, 16> &DeadInsts) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (llvm::is_contained(HandleIntrins, II->getIntrinsicID())) {
+ DeadInsts.insert(II);
+ return {nullptr, II->getArgOperand(/*Index=*/3)};
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
+ auto *V = dyn_cast<Instruction>(II->getArgOperand(/*Handle=*/0));
+ auto Idx = getHandleIndicies(V, DeadInsts);
+ assert(Idx.first == nullptr &&
+ "Encountered multiple dx.resource.getpointers in ptr chain?");
+
+ DeadInsts.insert(II);
+ return {II->getArgOperand(1), Idx.second};
+ }
+ }
+
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ unsigned NumEdges = Phi->getNumIncomingValues();
+ assert(NumEdges != 0 && "Malformed Phi Node");
+
+ IRBuilder<> Builder(Phi);
+ PHINode *GetPtrPhi = PHINode::Create(Builder.getInt32Ty(), NumEdges);
+ PHINode *HandlePhi = PHINode::Create(Builder.getInt32Ty(), NumEdges);
+
+ bool HasGetPtr = true;
+ for (unsigned I = 0; I < NumEdges; I++) {
+ auto *BB = Phi->getIncomingBlock(I);
+ auto *V = dyn_cast<Instruction>(Phi->getIncomingValue(I));
+ auto [GetPtrIdx, HandleIdx] = getHandleIndicies(V, DeadInsts);
+ HasGetPtr &= GetPtrIdx != nullptr;
+ if (HasGetPtr)
+ GetPtrPhi->addIncoming(GetPtrIdx, BB);
+ HandlePhi->addIncoming(HandleIdx, BB);
+ }
+
+ if (HasGetPtr)
+ Builder.Insert(GetPtrPhi);
+ else
+ GetPtrPhi = nullptr;
+
+ Builder.Insert(HandlePhi);
+
+ DeadInsts.insert(Phi);
+ return {GetPtrPhi, HandlePhi};
+ }
+
+ if (auto *Select = dyn_cast<SelectInst>(I)) {
+ auto *TrueV = dyn_cast<Instruction>(Select->getTrueValue());
+ auto [TrueGetPtrIdx, TrueHandleIdx] = getHandleIndicies(TrueV, DeadInsts);
+
+ auto *FalseV = dyn_cast<Instruction>(Select->getFalseValue());
+ auto [FalseGetPtrIdx, FalseHandleIdx] =
+ getHandleIndicies(FalseV, DeadInsts);
+
+ IRBuilder<> Builder(Select);
+ Value *GetPtrSelect = nullptr;
+
+ if (TrueGetPtrIdx && FalseGetPtrIdx)
+ GetPtrSelect = Builder.CreateSelect(Select->getCondition(), TrueGetPtrIdx,
+ FalseGetPtrIdx);
+
+ auto *HandleSelect = Builder.CreateSelect(Select->getCondition(),
+ TrueHandleIdx, FalseHandleIdx);
+ DeadInsts.insert(Select);
+ return {GetPtrSelect, HandleSelect};
+ }
+
+ llvm_unreachable("collectUsedHandles should assure this does not occur");
+}
+
+static void
+replaceHandleWithIndicies(Instruction *Ptr, IntrinsicInst *OldHandle,
+ SmallSetVector<Instruction *, 16> &DeadInsts) {
+ auto [GetPtrIdx, HandleIdx] = getHandleIndicies(Ptr, DeadInsts);
+
+ IRBuilder<> Builder(Ptr);
+ IntrinsicInst *Handle = cast<IntrinsicInst>(OldHandle->clone());
+ Handle->setArgOperand(/*Index=*/3, HandleIdx);
+ Builder.Insert(Handle);
+
+ auto *GetPtr = Builder.CreateIntrinsic(
+ Ptr->getType(), Intrinsic::dx_resource_getpointer, {Handle, GetPtrIdx});
+
+ Ptr->replaceAllUsesWith(GetPtr);
+ DeadInsts.insert(Ptr);
+}
+
+static bool tryReplaceHandlesWithIndices(Function &F,
+ DXILResourceTypeMap &DRTM) {
+ SmallSetVector<Instruction *, 16> DeadInsts;
+ for (BasicBlock &BB : make_early_inc_range(F))
+ for (Instruction &I : BB)
+ if (auto *PtrOp = getPointerOperand(&I)) {
+ SmallVector<IntrinsicInst *> Handles = collectUsedHandles(PtrOp);
+ if (Handles.size() <= 1)
+ continue;
+ // Can replace with an index into handle call
+ if (haveCommonBinding(Handles, DRTM))
+ replaceHandleWithIndicies(PtrOp, Handles[0], DeadInsts);
+ }
+
+ bool MadeChanges = DeadInsts.size() > 0;
+
+ for (auto *I : llvm::reverse(DeadInsts))
+ if (I->hasNUses(0)) // Handle maybe used elsewhere aside from replaced path
+ I->eraseFromParent();
+
+ return MadeChanges;
+}
+
static SmallVector<Instruction *> collectBlockUseDef(Instruction *Start) {
SmallPtrSet<Instruction *, 32> Visited;
SmallVector<Instruction *, 32> Worklist;
@@ -525,6 +716,42 @@ static void phiNodeReplacement(IntrinsicInst *II,
CurrBBDeadInsts.clear();
}
+static bool hoistGetPtrUses(Function &F, DXILResourceTypeMap &DRTM) {
+ SetVector<BasicBlock *> DeadBB;
+ SmallVector<Instruction *> PrevBBDeadInsts;
+
+ for (BasicBlock &BB : make_early_inc_range(F))
+ for (Instruction &I : make_early_inc_range(BB))
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
+ phiNodeReplacement(II, PrevBBDeadInsts, DeadBB);
+
+ bool MadeChanges = DeadBB.size() > 0;
+
+ for (auto *Dead : PrevBBDeadInsts)
+ Dead->eraseFromParent();
+ PrevBBDeadInsts.clear();
+ for (auto *Dead : DeadBB)
+ Dead->eraseFromParent();
+ DeadBB.clear();
+
+ return MadeChanges;
+}
+
+static bool legalizeResourceHandles(Function &F, DXILResourceTypeMap &DRTM) {
+ // Try to replace dx.resource.handlefrom.*.binding and dx.resource.getpointer
+ // calls with their respective index values and propogate the index values to
+ // be used at resource access. This legalizes the use of handles when:
+ // - A local resource is created from an Index into a global binding
+ // - GVN sink of store/load of a ptr/handle
+ bool MadeReplacements = tryReplaceHandlesWithIndices(F, DRTM);
+ // Since a Convergent op can't sink through control flow, and GVN is handled
+ // above, we can now undo any InstCombine optimizations that caused a
+ // dx.resource.getpointer ptr to sink by hoisting it back up.
+ bool MadeHoistChanges = hoistGetPtrUses(F, DRTM);
+ return MadeReplacements || MadeHoistChanges;
+}
+
static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
SmallVector<User *> Worklist;
for (User *U : II->users())
@@ -560,27 +787,13 @@ static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
SmallVector<std::pair<IntrinsicInst *, dxil::ResourceTypeInfo>> Resources;
- SetVector<BasicBlock *> DeadBB;
- SmallVector<Instruction *> PrevBBDeadInsts;
- for (BasicBlock &BB : make_early_inc_range(F)) {
- for (Instruction &I : make_early_inc_range(BB))
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
- phiNodeReplacement(II, PrevBBDeadInsts, DeadBB);
-
+ for (BasicBlock &BB : make_early_inc_range(F))
for (Instruction &I : BB)
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
Resources.emplace_back(II, DRTM[HandleTy]);
}
- }
- for (auto *Dead : PrevBBDeadInsts)
- Dead->eraseFromParent();
- PrevBBDeadInsts.clear();
- for (auto *Dead : DeadBB)
- Dead->eraseFromParent();
- DeadBB.clear();
for (auto &[II, RI] : Resources)
replaceAccess(II, RI);
@@ -595,8 +808,9 @@ PreservedAnalyses DXILResourceAccess::run(Function &F,
MAMProxy.getCachedResult<DXILResourceTypeAnalysis>(*F.getParent());
assert(DRTM && "DXILResourceTypeAnalysis must be available");
- bool MadeChanges = transformResourcePointers(F, *DRTM);
- if (!MadeChanges)
+ bool MadeHandleChanges = legalizeResourceHandles(F, *DRTM);
+ bool MadeResourceChanges = transformResourcePointers(F, *DRTM);
+ if (!(MadeHandleChanges || MadeResourceChanges))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -611,7 +825,9 @@ class DXILResourceAccessLegacy : public FunctionPass {
bool runOnFunction(Function &F) override {
DXILResourceTypeMap &DRTM =
getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
- return transformResourcePointers(F, DRTM);
+ bool MadeHandleChanges = legalizeResourceHandles(F, DRTM);
+ bool MadeResourceChanges = transformResourcePointers(F, DRTM);
+ return MadeHandleChanges || MadeResourceChanges;
}
StringRef getPassName() const override { return "DXIL Resource Access"; }
DXILResourceAccessLegacy() : FunctionPass(ID) {}
diff --git a/llvm/test/CodeGen/DirectX/phi-node-replacement.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/hoist-from-getptr.ll
similarity index 100%
rename from llvm/test/CodeGen/DirectX/phi-node-replacement.ll
rename to llvm/test/CodeGen/DirectX/ResourceAccess/hoist-from-getptr.ll
diff --git a/llvm/test/CodeGen/DirectX/issue-152348.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/issue-152348.ll
similarity index 100%
rename from llvm/test/CodeGen/DirectX/issue-152348.ll
rename to llvm/test/CodeGen/DirectX/ResourceAccess/issue-152348.ll
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll
new file mode 100644
index 0000000000000..8991536bd4797
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/legalize-handle-cases.ll
@@ -0,0 +1,219 @@
+; RUN: opt -S -dxil-resource-type -dxil-resource-access -disable-verify \
+; RUN: -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; The file contains examples of hlsl snippets that will generate invalid dxil
+; resource access, either through code-gen or by an InstCombine/GVN sink
+; optimization
+
+; NOTE: The below resources are generated with:
+;
+; RWBuffer<int> In : register(u0);
+; RWStructuredBuffer<int> Out0 : register(u1);
+; RWStructuredBuffer<int> Out1 : register(u2);
+; RWStructuredBuffer<int> OutArr[];
+
+; cbuffer c {
+; bool cond;
+; };
+
+%__cblayout_c = type <{ i32 }>
+
+@.str = internal unnamed_addr constant [3 x i8] c"In\00", align 1
+@.str.2 = internal unnamed_addr constant [5 x i8] c"Out0\00", align 1
+@.str.3 = internal unnamed_addr constant [5 x i8] c"Out1\00", align 1
+@c.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_c) poison
+@c.str = internal unnamed_addr constant [2 x i8] c"c\00", align 1
+@OutArr.str = internal unnamed_addr constant [7 x i8] c"OutArr\00", align 1
+
+; Local select into global resource array:
+;
+; RWStructuredBuffer<int> Out = cond ? OutArr[0] : OutArr[1];
+; Out[GI] = WaveActiveMax(In[GI]);
+;
+; CHECK-LABEL: @select_global_resource_array()
+define void @select_global_resource_array() {
+entry:
+ %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str)
+ store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4
+ %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4
+ %0 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0)
+ %1 = load i32, ptr addrspace(2) %0, align 4
+ %loadedv.i = trunc nuw i32 %1 to i1
+ br i1 %loadedv.i, label %cond.true.i, label %cond.false.i
+
+cond.true.i:
+; CHECK: cond.true.i:
+; CHECK-NEXT: br label %cond.end.i
+ %2 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 0, ptr nonnull @OutArr.str)
+ br label %cond.end.i
+
+cond.false.i:
+; CHECK: cond.false.i:
+; CHECK-NEXT: br label %cond.end.i
+ %3 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 1, ptr nonnull @OutArr.str)
+ br label %cond.end.i
+
+cond.end.i:
+; CHECK: cond.end.i
+; CHECK-NEXT: %[[HANDLE_IDX:.*]] = phi i32 [ 0, %cond.true.i ], [ 1, %cond.false.i ]
+; CHECK: %[[TID:.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group()
+; CHECK: %[[WAVE_MAX:.*]] = tail call i32 @llvm.dx.wave.reduce.max.i32(i32 %{{.*}})
+; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0)
+; CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i32_1_0t(i32 2, i32 0, i32 -1, i32 %[[HANDLE_IDX]], ptr nonnull @OutArr.str)
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[TID]], i32 0, i32 %[[WAVE_MAX]])
+; CHECK-NEXT: ret void
+ %cond.i.sroa.speculated = phi target("dx.RawBuffer", i32, 1, 0) [ %2, %cond.true.i ], [ %3, %cond.false.i ]
+ %4 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %5 = tail call i32 @llvm.dx.flattened.thread.id.in.group()
+ %6 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %4, i32 %5)
+ %7 = load i32, ptr %6, align 4
+ %hlsl.wave.active.max.i = tail call i32 @llvm.dx.wave.reduce.max.i32(i32 %7)
+ %8 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %cond.i.sroa.speculated, i32 %5)
+ store i32 %hlsl.wave.active.max.i, ptr %8, align 4
+ ret void
+}
+
+; GVN Sink of handle ptr
+;
+; if (cond) {
+; Out0[GI] = WaveActiveSum(In[GI]);
+; } else {
+; Out0[0] = In[GI];
+; }
+; Out0[GI] = WaveActiveSum(In[GI]);
+;
+; CHECK-LABEL: @gvn_sink()
+define void @gvn_sink() {
+entry:
+ %0 = tail call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 0, ptr nonnull @.str.2)
+ %c.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_c) @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_cst(i32 4, i32 0, i32 1, i32 0, ptr nonnull @c.str)
+ store target("dx.CBuffer", %__cblayout_c) %c.cb_h.i.i, ptr @c.cb, align 4
+ %2 = tail call i32 @llvm.dx.flattened.thread.id.in.group()
+ %c.cb = load target("dx.CBuffer", %__cblayout_c), ptr @c.cb, align 4
+ %3 = call ptr addrspace(2) @llvm.dx.resource.getpointer.p2.tdx.CBuffer_s___cblayout_cst(target("dx.CBuffer", %__cblayout_c) %c.cb, i32 0)
+ %4 = load i32, ptr addrspace(2) %3, align 4
+ %loadedv.i = trunc nuw i32 %4 to i1
+ %5 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %0, i32 %2)
+ %6 = load i32, ptr %5, align 4
+ br i1 %loadedv.i, label %if.then.i, label %if.else.i
+
+if.then.i:
+ %hlsl.wave.active.sum.i = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 %6)
+ %7 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 %2)
+ store i32 %hlsl.wave.active.sum.i, ptr %7, align 4
+ br label %_Z4mainj.exit
+
+if.else.i:
+ %8 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 0)
+ store i32 %6, ptr %8, align 4
+ %.pre = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i32 %2)
+ br label %_Z4mainj.exit
+
+_Z4mainj.exit:
+; CHECK: _Z4mainj.exit:
+; CHECK-NEXT: %[[TID:.*]] = phi i32 [ %2, %if.then.i ], [ %2, %if.else.i ]
+; CHECK-NEXT: %[[HANDLE_IDX:.*]] = phi i32 [ 0, %if.then.i ], [ 0, %if.else.i ]
+; CHECK-NEXT: %[[HANDLE:.*]] = tail call target("dx.RawBuffer", i32, 1, 0)
+; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 1, i32 1, i32 %[[HANDLE_IDX]], ptr nonnull @.str.2)
+; CHECK: %[[WAVE_SUM:.*]] = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 {{.*}})
+; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i32_1_0t.i32(
+; CHECK-SAME: target("dx.RawBuffer", i32, 1, 0) %[[HANDLE]], i32 %[[TID]], i32 0, i32 %[[WAVE_SUM]])
+; CHECK-NEXT: ret void
+ %.pre-phi1 = phi ptr [ %7, %if.then.i ], [ %.pre, %if.else.i ]
+ %9 = load i32, ptr %5, align 4
+ %hlsl.wave.active.sum5.i = tail call i32 @llvm.dx.wave.reduce.sum.i32(i32 %9)
+ store i32 %hlsl.wave.active.sum5.i, ptr %.pre-phi1, align 4
+ ret void
+}
+
+; Using a local a...
[truncated]
|
joaosaffran
left a comment
There was a problem hiding this comment.
Left some questions and suggestions to help improve readability. I am not 100% with the problem edge cases, the logic looks good to me.
| hlsl::Binding B = getBinding(Handles[0], DRTM); | ||
| for (unsigned I = 1; I < NumHandles; I++) | ||
| if (B != getBinding(Handles[I], DRTM)) | ||
| return false; // No-legalization is possible |
There was a problem hiding this comment.
nit: Since the function is named haveCommonBinding, I was expecting a comparison across all handles (e.g., pairwise). Here, all handles are only compared against Handles[0].
Is there an assumption that guarantees that checking Handles[0] checks all bindings? Or is the intent simply to check whether all handles match the first one?
There was a problem hiding this comment.
Should be named haveSameBinding if we are simply checking if all the handles have the same binding.
There was a problem hiding this comment.
I inlined the function so that it is read with a context that makes more sense for the check
| hlsl::Binding B = getBinding(Handles[0], DRTM); | ||
| for (unsigned I = 1; I < NumHandles; I++) | ||
| if (B != getBinding(Handles[I], DRTM)) | ||
| return false; // No-legalization is possible |
There was a problem hiding this comment.
Should be named haveSameBinding if we are simply checking if all the handles have the same binding.
9d774fb to
eeb9236
Compare
|
|
||
| static hlsl::Binding getHandleIntrinsicBinding(IntrinsicInst *Handle, | ||
| DXILResourceTypeMap &DRTM) { | ||
| assert(llvm::is_contained(HandleIntrins, Handle->getIntrinsicID())); |
There was a problem hiding this comment.
Can you have this assert print something. It would be particularly more useful if the intrinsic was included in the assert message so you know what part of ir to look at.
There was a problem hiding this comment.
I am not quite sure what you mean. Do you mean that in the case this statement is false, I should dump some information related to what Handle is?
The intent is that this should only be called on the elements from collectUsedHandles as we have guaranteed that these are handles.
| unsigned NumEdges = Phi->getNumIncomingValues(); | ||
| assert(NumEdges != 0 && "Malformed Phi Node"); |
There was a problem hiding this comment.
this assert is because of how we defined token like types? should we add a comment to that effect?
There was a problem hiding this comment.
This isn't specific to TokenLikeTypes, we can expect any phi-node at this stage to be complete and have at least 1 predecessor.
| // | ||
| // - GetPtrIdx is the index of dx.resource.getpointer | ||
| // - HandleIdx is the index of dx.resource.handlefrom.* | ||
| static std::pair<Value *, Value *> |
There was a problem hiding this comment.
Would this be better as a struct so you can have named atributes instead of first\second?
|
This change is doing two somewhat unrelated things - dealing with In particular, I'm a bit uncomfortable with the hoops we're going through to undo GVN and instcombine here - I'd rather we try to avoid unhelpful transformations in the first place. For example, in your examples in https://godbolt.org/z/bhhqde77c cases F and G seem to be avoided by simply marking |
Is it really that different than what we were doing before? Yes we are undoing some of the GVN transformations, but we are using the same |
This change replaces the propagation of handles (or corresponding ptr) through control flow with an index into the unique global resource it is accessing.
This legalizes code-generation of handle selection and various optimizations on the created ptr into the resource, as illustrated here.
This specifically resolves all indexing into a global resource array, but also undoes the sink of a handle/ptr in the GVN pass.
By handling GVN, this resolves #165288.
Note: This does not handle the cases when a local resource appears to be assigned into by different global resources. This is to be handled by the resolution of #179303.