Skip to content

Commit b34136b

Browse files
alsepkowCopilot
andcommitted
Fix GVN and SROA miscompilation of min precision vector element access
Multiple optimization passes mishandle min precision vector types due to DXC's padded data layout (i16:32, f16:32), where getTypeSizeInBits returns padded sizes for vectors but primitive sizes for scalars. Bug 1 - GVN ICE: CanCoerceMustAliasedValueToLoad computes a padded integer type (e.g., i96 for <3 x half>) then attempts to bitcast from the 48-bit LLVM type, triggering an assert. Fix: reject coercion when type sizes include padding. Bug 2 - GVN incorrect store forwarding: processLoad forwards a 'store <3 x i16> zeroinitializer' past partial element stores because MemoryDependence uses padded sizes for aliasing. Fix: skip store-to-load forwarding for padded types. Bug 3 - SROA element misindexing: getNaturalGEPRecursively uses getTypeSizeInBits (2 bytes for i16) for element offsets while GEP uses getTypeAllocSize (4 bytes with i16:32). Byte offset 4 (element 1) maps to index 4/2=2 instead of 4/4=1, causing SROA to misplace or eliminate element stores. Fix: use getTypeAllocSizeInBits consistently for vector element sizes throughout SROA. Fixes microsoft#8268 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent fbc8aed commit b34136b

2 files changed

Lines changed: 35 additions & 4 deletions

File tree

lib/Transforms/Scalar/GVN.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,20 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
853853
StoredVal->getType()->isArrayTy())
854854
return false;
855855

856+
// HLSL Change Begin - Don't coerce types that have padding in the data
857+
// layout (e.g., min precision types where f16:32 means half is stored in 32
858+
// bits). The coercion creates bitcasts between the LLVM type (based on
859+
// primitive bit width) and an integer type (based on padded store size),
860+
// which will fail when they differ.
861+
Type *StoredValTy = StoredVal->getType();
862+
uint64_t StoredPrimBits = StoredValTy->getPrimitiveSizeInBits();
863+
uint64_t LoadPrimBits = LoadTy->getPrimitiveSizeInBits();
864+
if (StoredPrimBits && DL.getTypeSizeInBits(StoredValTy) != StoredPrimBits)
865+
return false;
866+
if (LoadPrimBits && DL.getTypeSizeInBits(LoadTy) != LoadPrimBits)
867+
return false;
868+
// HLSL Change End
869+
856870
// The store has to be at least as big as the load.
857871
if (DL.getTypeSizeInBits(StoredVal->getType()) <
858872
DL.getTypeSizeInBits(LoadTy))
@@ -1942,6 +1956,17 @@ bool GVN::processLoad(LoadInst *L) {
19421956
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
19431957
Value *StoredVal = DepSI->getValueOperand();
19441958

1959+
// HLSL Change Begin - Don't forward stores of types with data layout
1960+
// padding (e.g., min precision vectors where i16:32/f16:32 means elements
1961+
// are padded to 32 bits). MemoryDependence may incorrectly classify
1962+
// intermediate partial stores as non-clobbering when sizes include padding,
1963+
// leading to incorrect store-to-load forwarding.
1964+
Type *StoredTy = StoredVal->getType();
1965+
uint64_t StoredPrimBits = StoredTy->getPrimitiveSizeInBits();
1966+
if (StoredPrimBits && DL.getTypeSizeInBits(StoredTy) != StoredPrimBits)
1967+
return false;
1968+
// HLSL Change End
1969+
19451970
// The store and load are to a must-aliased pointer, but they may not
19461971
// actually have the same type. See if we know how to reuse the stored
19471972
// value (depending on its type).

lib/Transforms/Scalar/SROA.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,7 +1671,11 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
16711671
// extremely poorly defined currently. The long-term goal is to remove GEPing
16721672
// over a vector from the IR completely.
16731673
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1674-
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
1674+
// HLSL Change: Use alloc size instead of primitive type size for vector
1675+
// elements. DXC's data layout pads min precision types (i16:32, f16:32),
1676+
// so getTypeAllocSize matches the GEP offset stride while
1677+
// getTypeSizeInBits returns the unpadded primitive width.
1678+
unsigned ElementSizeInBits = DL.getTypeAllocSizeInBits(VecTy->getScalarType());
16751679
if (ElementSizeInBits % 8 != 0) {
16761680
// GEPs over non-multiple of 8 size vector elements are invalid.
16771681
return nullptr;
@@ -2134,7 +2138,8 @@ static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
21342138

21352139
// Try each vector type, and return the one which works.
21362140
auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
2137-
uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
2141+
// HLSL Change: Use alloc size to match GEP offset stride for padded types.
2142+
uint64_t ElementSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
21382143

21392144
// While the definition of LLVM vectors is bitpacked, we don't support sizes
21402145
// that aren't byte sized.
@@ -2492,12 +2497,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
24922497
: nullptr),
24932498
VecTy(PromotableVecTy),
24942499
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2495-
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2500+
// HLSL Change: Use alloc size to match GEP offset stride for padded types.
2501+
ElementSize(VecTy ? DL.getTypeAllocSizeInBits(ElementTy) / 8 : 0),
24962502
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
24972503
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
24982504
IRB(NewAI.getContext(), ConstantFolder()) {
24992505
if (VecTy) {
2500-
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
2506+
assert((DL.getTypeAllocSizeInBits(ElementTy) % 8) == 0 &&
25012507
"Only multiple-of-8 sized vector elements are viable");
25022508
++NumVectorized;
25032509
}

0 commit comments

Comments
 (0)