Revert "[AMDGPU][True16][CodeGen] true16 codegen pattern for fma (#12…#127175
Merged
broxigarchen merged 1 commit intollvm:mainfrom Feb 14, 2025
Merged
Revert "[AMDGPU][True16][CodeGen] true16 codegen pattern for fma (#12…#127175broxigarchen merged 1 commit intollvm:mainfrom
broxigarchen merged 1 commit intollvm:mainfrom
Conversation
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) Changes…2950)" This reverts commit 2a7487c. Patch is 65.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127175.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9cc74a7acd8ae..d8f3f9c54abc1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -198,8 +198,6 @@ static unsigned macToMad(unsigned Opc) {
return AMDGPU::V_FMA_F32_e64;
case AMDGPU::V_FMAC_F16_e64:
return AMDGPU::V_FMA_F16_gfx9_e64;
- case AMDGPU::V_FMAC_F16_t16_e64:
- return AMDGPU::V_FMA_F16_gfx9_t16_e64;
case AMDGPU::V_FMAC_F16_fake16_e64:
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
case AMDGPU::V_FMAC_LEGACY_F32_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a01ee1dc3a71..baacb5d3d5455 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool IsFMA =
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewOpc =
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
- : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAMK_F16_t16
- : AMDGPU::V_FMAMK_F16_fake16
+ : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16)
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
- // restricting their register classes. For now just bail out.
- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
- NewOpc == AMDGPU::V_FMAMK_F16_fake16)
+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
return false;
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0->setIsKill(RegSrc->isKill());
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewOpc =
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
- : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAAK_F16_t16
- : AMDGPU::V_FMAAK_F16_fake16
+ : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16)
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
- // restricting their register classes. For now just bail out.
- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
- NewOpc == AMDGPU::V_FMAAK_F16_fake16)
+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
return false;
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
return AMDGPU::V_FMA_LEGACY_F32_e64;
case AMDGPU::V_FMAC_F16_e32:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
- return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMA_F16_gfx9_t16_e64
- : AMDGPU::V_FMA_F16_gfx9_fake16_e64
+ return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
: AMDGPU::V_FMA_F16_gfx9_e64;
case AMDGPU::V_FMAC_F32_e32:
case AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
- assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
- Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
- "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
- "present "
- "pre-RA");
+ assert(
+ Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
+ "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
+ "pre-RA");
// Handle MAC/FMAC.
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return nullptr;
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
int64_t Imm;
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAAK_F16_t16
- : AMDGPU::V_FMAAK_F16_fake16
- : AMDGPU::V_FMAAK_F16)
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
+ : AMDGPU::V_FMAAK_F16)
: AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
if (pseudoToMCOpcode(NewOpc) != -1) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
}
- unsigned NewOpc = IsFMA
- ? (IsF16 ? (ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAMK_F16_t16
- : AMDGPU::V_FMAMK_F16_fake16
- : AMDGPU::V_FMAMK_F16)
- : AMDGPU::V_FMAMK_F32)
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
+ unsigned NewOpc =
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
+ : AMDGPU::V_FMAMK_F16)
+ : AMDGPU::V_FMAMK_F32)
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
case AMDGPU::V_FMAC_F32_e64:
case AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
- case AMDGPU::S_FMAC_F16:
- return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
- : AMDGPU::V_FMAC_F16_fake16_e64;
+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3faf0795157dc..6e08aff24ec23 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3287,14 +3287,6 @@ def : GCNPat <
(V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
-let True16Predicate = UseRealTrue16Insts in
-def : GCNPat <
- (fma (f16 (VOP3NoMods f16:$src0)),
- (f16 (VOP3NoMods f16:$src1)),
- (f16 (VOP3NoMods f16:$src2))),
- (V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
- SRCMODS.NONE, $src2)
->;
let True16Predicate = UseFakeTrue16Insts in
def : GCNPat <
(fma (f16 (VOP3NoMods f16:$src0)),
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index f03cde455f295..979812e07fc3f 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -455,13 +455,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16;
- break;
- case AMDGPU::V_FMA_F16_gfx9_t16_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16_t16;
- break;
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
+ NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
+ : AMDGPU::V_FMAAK_F16;
break;
}
}
@@ -489,13 +485,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16;
- break;
- case AMDGPU::V_FMA_F16_gfx9_t16_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16_t16;
- break;
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
+ NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
+ : AMDGPU::V_FMAMK_F16;
break;
}
}
@@ -967,7 +959,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
shrinkMadFma(MI);
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 0b09cabf25a16..99e6c5d06a0e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -3,8 +3,7 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
define float @v_fma_f32(float %x, float %y, float %z) {
; GFX6-LABEL: v_fma_f32:
@@ -108,18 +107,11 @@ define half @v_fma_f16(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
ret half %fma
}
@@ -153,17 +145,11 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg half %x
%fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
ret half %fma
@@ -198,17 +184,11 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg half %y
%fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
ret half %fma
@@ -243,17 +223,11 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_add:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg half %z
%fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
ret half %fma
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
index 23e4b80b61f69..ac7944f25fe37 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow
-# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index a33fd03e0ce03..52a23690dcf53 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -3,10 +3,8 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-TRUE16
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-FAKE16
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-TRUE16
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-FAKE16
+; RUN: l...
[truncated]
|
Member
|
@llvm/pr-subscribers-llvm-globalisel Author: Brox Chen (broxigarchen) Changes…2950)" This reverts commit 2a7487c. Patch is 65.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127175.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9cc74a7acd8ae..d8f3f9c54abc1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -198,8 +198,6 @@ static unsigned macToMad(unsigned Opc) {
return AMDGPU::V_FMA_F32_e64;
case AMDGPU::V_FMAC_F16_e64:
return AMDGPU::V_FMA_F16_gfx9_e64;
- case AMDGPU::V_FMAC_F16_t16_e64:
- return AMDGPU::V_FMA_F16_gfx9_t16_e64;
case AMDGPU::V_FMAC_F16_fake16_e64:
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
case AMDGPU::V_FMAC_LEGACY_F32_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a01ee1dc3a71..baacb5d3d5455 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool IsFMA =
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewOpc =
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
- : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAMK_F16_t16
- : AMDGPU::V_FMAMK_F16_fake16
+ : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16)
: (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
- // restricting their register classes. For now just bail out.
- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
- NewOpc == AMDGPU::V_FMAMK_F16_fake16)
+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
return false;
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0->setIsKill(RegSrc->isKill());
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewOpc =
IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
- : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAAK_F16_t16
- : AMDGPU::V_FMAAK_F16_fake16
+ : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16)
: (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
- // takes VGPR_32_Lo128 operands, so the rewrite would also require
- // restricting their register classes. For now just bail out.
- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
- NewOpc == AMDGPU::V_FMAAK_F16_fake16)
+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
return false;
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
return AMDGPU::V_FMA_LEGACY_F32_e64;
case AMDGPU::V_FMAC_F16_e32:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
- return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMA_F16_gfx9_t16_e64
- : AMDGPU::V_FMA_F16_gfx9_fake16_e64
+ return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
: AMDGPU::V_FMA_F16_gfx9_e64;
case AMDGPU::V_FMAC_F32_e32:
case AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
- assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
- Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
- "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
- "present "
- "pre-RA");
+ assert(
+ Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
+ "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
+ "pre-RA");
// Handle MAC/FMAC.
bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return nullptr;
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
int64_t Imm;
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAAK_F16_t16
- : AMDGPU::V_FMAAK_F16_fake16
- : AMDGPU::V_FMAAK_F16)
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
+ : AMDGPU::V_FMAAK_F16)
: AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
if (pseudoToMCOpcode(NewOpc) != -1) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
}
- unsigned NewOpc = IsFMA
- ? (IsF16 ? (ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts()
- ? AMDGPU::V_FMAMK_F16_t16
- : AMDGPU::V_FMAMK_F16_fake16
- : AMDGPU::V_FMAMK_F16)
- : AMDGPU::V_FMAMK_F32)
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
+ unsigned NewOpc =
+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
+ : AMDGPU::V_FMAMK_F16)
+ : AMDGPU::V_FMAMK_F32)
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
case AMDGPU::V_FMAC_F16_fake16_e64:
case AMDGPU::V_FMAC_F32_e64:
case AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
- case AMDGPU::S_FMAC_F16:
- return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
- : AMDGPU::V_FMAC_F16_fake16_e64;
+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3faf0795157dc..6e08aff24ec23 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3287,14 +3287,6 @@ def : GCNPat <
(V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
-let True16Predicate = UseRealTrue16Insts in
-def : GCNPat <
- (fma (f16 (VOP3NoMods f16:$src0)),
- (f16 (VOP3NoMods f16:$src1)),
- (f16 (VOP3NoMods f16:$src2))),
- (V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
- SRCMODS.NONE, $src2)
->;
let True16Predicate = UseFakeTrue16Insts in
def : GCNPat <
(fma (f16 (VOP3NoMods f16:$src0)),
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index f03cde455f295..979812e07fc3f 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -455,13 +455,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16;
- break;
- case AMDGPU::V_FMA_F16_gfx9_t16_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16_t16;
- break;
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
- NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
+ NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
+ : AMDGPU::V_FMAAK_F16;
break;
}
}
@@ -489,13 +485,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16;
- break;
- case AMDGPU::V_FMA_F16_gfx9_t16_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16_t16;
- break;
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
- NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
+ NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
+ : AMDGPU::V_FMAMK_F16;
break;
}
}
@@ -967,7 +959,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
shrinkMadFma(MI);
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 0b09cabf25a16..99e6c5d06a0e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -3,8 +3,7 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
define float @v_fma_f32(float %x, float %y, float %z) {
; GFX6-LABEL: v_fma_f32:
@@ -108,18 +107,11 @@ define half @v_fma_f16(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
ret half %fma
}
@@ -153,17 +145,11 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg half %x
%fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
ret half %fma
@@ -198,17 +184,11 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg half %y
%fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
ret half %fma
@@ -243,17 +223,11 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_fma_f16_fneg_add:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg half %z
%fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
ret half %fma
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
index 23e4b80b61f69..ac7944f25fe37 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow
-# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index a33fd03e0ce03..52a23690dcf53 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -3,10 +3,8 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-TRUE16
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-FAKE16
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-TRUE16
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-FAKE16
+; RUN: l...
[truncated]
|
joaosaffran
pushed a commit
to joaosaffran/llvm-project
that referenced
this pull request
Feb 14, 2025
…m#12… (llvm#127175) Reverting this patch since it raise buildbot failure This reverts commit 2a7487c.
sivan-shani
pushed a commit
to sivan-shani/llvm-project
that referenced
this pull request
Feb 24, 2025
…m#12… (llvm#127175) Reverting this patch since it raise buildbot failure This reverts commit 2a7487c.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Reverting this patch since it raise buildbot failure
This reverts commit 2a7487c.