diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index a032dd2092140..c8ce3aab3f303 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -28,20 +28,11 @@ #include "GCNRegPressure.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCInstrItineraries.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" -#include -#include #define DEBUG_TYPE "machine-scheduler" @@ -979,8 +970,6 @@ void GCNScheduleDAGMILive::schedule() { GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { - if (Regions[RegionIdx].first == Regions[RegionIdx].second) - return llvm::getRegPressure(MRI, LiveIns[RegionIdx]); GCNDownwardRPTracker RPTracker(*LIS); RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second, &LiveIns[RegionIdx]); @@ -1283,222 +1272,33 @@ bool ClusteredLowOccStage::initGCNSchedStage() { #define REMAT_PREFIX "[PreRARemat] " #define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;) -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -Printable PreRARematStage::ScoredRemat::print() const { - return Printable([&](raw_ostream &OS) { - OS << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')'; - }); -} -#endif - bool PreRARematStage::initGCNSchedStage() { // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for // regions inbetween the defs and region we sinked the def to. Will need to be // fixed if there is another pass after this pass. assert(!S.hasNextStage()); - if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1) + if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1) return false; - // Maps all MIs (except lone terminators, which are not part of any region) to - // their parent region. Non-lone terminators are considered part of the region - // they delimitate. - DenseMap MIRegion(MF.getInstructionCount()); - // Before performing any IR modification record the parent region of each MI // and the parent MBB of each region. const unsigned NumRegions = DAG.Regions.size(); + RegionBB.reserve(NumRegions); for (unsigned I = 0; I < NumRegions; ++I) { RegionBoundaries Region = DAG.Regions[I]; for (auto MI = Region.first; MI != Region.second; ++MI) MIRegion.insert({&*MI, I}); - MachineBasicBlock *ParentMBB = Region.first->getParent(); - if (Region.second != ParentMBB->end()) - MIRegion.insert({&*Region.second, I}); - RegionBB.push_back(ParentMBB); - } - -#ifndef NDEBUG - auto PrintTargetRegions = [&]() -> void { - if (TargetRegions.none()) { - dbgs() << REMAT_PREFIX << "No target regions\n"; - return; - } - dbgs() << REMAT_PREFIX << "Target regions:\n"; - for (unsigned I : TargetRegions.set_bits()) - dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n'; - }; - auto PrintRematReg = [&](const RematReg &Remat) -> Printable { - return Printable([&, Remat](raw_ostream &OS) { - // Concatenate all region numbers in which the register is unused and - // live-through. - bool HasLiveThroughRegion = false; - OS << '[' << Remat.DefRegion << " -"; - for (unsigned I = 0; I < NumRegions; ++I) { - if (Remat.isUnusedLiveThrough(I)) { - if (HasLiveThroughRegion) { - OS << ','; - } else { - OS << "- "; - HasLiveThroughRegion = true; - } - OS << I; - } - } - if (HasLiveThroughRegion) - OS << " -"; - OS << "-> " << Remat.UseRegion << "] "; - Remat.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, - /*SkipDebugLoc=*/false, /*AddNewLine=*/false); - }); - }; -#endif - - // Set an objective for the stage based on current RP in each region. - REMAT_DEBUG({ - dbgs() << "Analyzing "; - MF.getFunction().printAsOperand(dbgs(), false); - dbgs() << ": "; - }); - if (!setObjective()) { - LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at " - << MFI.getMaxWavesPerEU() << '\n'); - return false; + RegionBB.push_back(Region.first->getParent()); } - LLVM_DEBUG({ - if (TargetOcc) { - dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n'; - } else { - dbgs() << "reduce spilling (minimum target occupancy is " - << MFI.getMinWavesPerEU() << ")\n"; - } - PrintTargetRegions(); - }); - - if (!collectRematRegs(MIRegion)) { - REMAT_DEBUG(dbgs() << "No rematerializable registers\n"); - return false; - } - const ScoredRemat::FreqInfo FreqInfo(MF, DAG); - REMAT_DEBUG({ - dbgs() << "Rematerializable registers:\n"; - for (const RematReg &Remat : RematRegs) - dbgs() << REMAT_PREFIX << " " << PrintRematReg(Remat) << '\n'; - dbgs() << REMAT_PREFIX << "Region frequencies\n"; - for (auto [I, Freq] : enumerate(FreqInfo.Regions)) { - dbgs() << REMAT_PREFIX << " [" << I << "] "; - if (Freq) - dbgs() << Freq; - else - dbgs() << "unknown "; - dbgs() << " | " << *DAG.Regions[I].first; - } - }); - SmallVector ScoredRemats; - for (RematReg &Remat : RematRegs) - ScoredRemats.emplace_back(&Remat, FreqInfo, DAG); - -// Rematerialize registers in successive rounds until all RP targets are -// satisifed or until we run out of rematerialization candidates. -#ifndef NDEBUG - unsigned RoundNum = 0; -#endif - BitVector RecomputeRP(NumRegions); - do { - assert(!ScoredRemats.empty() && "no more remat candidates"); - - // (Re-)Score and (re-)sort all remats in increasing score order. - for (ScoredRemat &Remat : ScoredRemats) - Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc); - sort(ScoredRemats); - - REMAT_DEBUG({ - dbgs() << "==== ROUND " << RoundNum++ << " ====\n" - << REMAT_PREFIX - << "Candidates with non-null score, in rematerialization order:\n"; - for (const ScoredRemat &RematDecision : reverse(ScoredRemats)) { - if (RematDecision.hasNullScore()) - break; - dbgs() << REMAT_PREFIX << " " << RematDecision.print() << " | " - << *RematDecision.Remat->DefMI; - } - PrintTargetRegions(); - }); - - RecomputeRP.reset(); - unsigned RematIdx = ScoredRemats.size(); - - // Rematerialize registers in decreasing score order until we estimate - // that all RP targets are satisfied or until rematerialization candidates - // are no longer useful to decrease RP. - for (; RematIdx && TargetRegions.any(); --RematIdx) { - const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1]; - // Stop rematerializing on encountering a null score. Since scores - // monotonically decrease as we rematerialize, we know there is nothing - // useful left to do in such cases, even if we were to re-score. - if (Candidate.hasNullScore()) { - RematIdx = 0; - break; - } - - const RematReg &Remat = *Candidate.Remat; - // When previous rematerializations in this round have already satisfied - // RP targets in all regions this rematerialization can impact, we have a - // good indication that our scores have diverged significantly from - // reality, in which case we interrupt this round and re-score. This also - // ensures that every rematerialization we perform is possibly impactful - // in at least one target region. - if (!Remat.maybeBeneficial(TargetRegions, RPTargets)) - break; - - REMAT_DEBUG(dbgs() << "** REMAT " << PrintRematReg(Remat) << '\n';); - // Every rematerialization we do here is likely to move the instruction - // into a higher frequency region, increasing the total sum latency of the - // instruction itself. This is acceptable if we are eliminating a spill in - // the process, but when the goal is increasing occupancy we get nothing - // out of rematerialization if occupancy is not increased in the end; in - // such cases we want to roll back the rematerialization. - RollbackInfo *Rollback = - TargetOcc ? &Rollbacks.emplace_back(&Remat) : nullptr; - rematerialize(Remat, RecomputeRP, Rollback); - unsetSatisifedRPTargets(Remat.Live); - } - - REMAT_DEBUG({ - if (!TargetRegions.any()) { - dbgs() << "** Interrupt round on all targets achieved\n"; - } else if (RematIdx) { - dbgs() << "** Interrupt round on stale score for " - << *ScoredRemats[RematIdx - 1].Remat->DefMI; - } else { - dbgs() << "** Stop on exhausted rematerialization candidates\n"; - } - }); - - // Peel off registers we already rematerialized from the vector's tail. - ScoredRemats.truncate(RematIdx); - } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) && - !ScoredRemats.empty()); - if (RescheduleRegions.none()) + if (!canIncreaseOccupancyOrReduceSpill()) return false; - // Commit all pressure changes to the DAG and compute minimum achieved - // occupancy in impacted regions. - REMAT_DEBUG(dbgs() << "==== REMAT RESULTS ====\n"); - unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize(); - for (unsigned I : RescheduleRegions.set_bits()) { - DAG.Pressure[I] = RPTargets[I].getCurrentRP(); - REMAT_DEBUG(dbgs() << '[' << I << "] Achieved occupancy " - << DAG.Pressure[I].getOccupancy(ST, DynamicVGPRBlockSize) - << " (" << RPTargets[I] << ")\n"); - } - AchievedOcc = MFI.getMaxWavesPerEU(); - for (const GCNRegPressure &RP : DAG.Pressure) { - AchievedOcc = - std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize)); - } - + // Rematerialize identified instructions and update scheduler's state. + rematerialize(); + if (GCNTrackers) + DAG.RegionLiveOuts.buildLiveRegMap(); REMAT_DEBUG({ dbgs() << "Retrying function scheduling with new min. occupancy of " << AchievedOcc << " from rematerializing (original was " @@ -1507,6 +1307,7 @@ bool PreRARematStage::initGCNSchedStage() { dbgs() << ", target was " << *TargetOcc; dbgs() << ")\n"; }); + if (AchievedOcc > DAG.MinOccupancy) { DAG.MinOccupancy = AchievedOcc; SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -1540,10 +1341,6 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() { } bool GCNSchedStage::initGCNRegion() { - // Skip empty scheduling region. - if (DAG.begin() == DAG.end()) - return false; - // Check whether this new region is also a new block. if (DAG.RegionBegin->getParent() != CurrentMBB) setupNewBlock(); @@ -1551,8 +1348,8 @@ bool GCNSchedStage::initGCNRegion() { unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); - // Skip regions with 1 schedulable instruction. - if (DAG.begin() == std::prev(DAG.end())) + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) return false; LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); @@ -2040,20 +1837,27 @@ void GCNSchedStage::revertScheduling() { DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); } -bool PreRARematStage::setObjective() { +bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() { const Function &F = MF.getFunction(); - // Set up "spilling targets" for all regions. + // Maps optimizable regions (i.e., regions at minimum and register-limited + // occupancy, or regions with spilling) to the target RP we would like to + // reach. + DenseMap OptRegions; unsigned MaxSGPRs = ST.getMaxNumSGPRs(F); unsigned MaxVGPRs = ST.getMaxNumVGPRs(F); - for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { - const GCNRegPressure &RP = DAG.Pressure[I]; - GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP); - if (!Target.satisfied()) - TargetRegions.set(I); - } + auto ResetTargetRegions = [&]() { + OptRegions.clear(); + for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { + const GCNRegPressure &RP = DAG.Pressure[I]; + GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP); + if (!Target.satisfied()) + OptRegions.insert({I, Target}); + } + }; - if (TargetRegions.any() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) { + ResetTargetRegions(); + if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) { // In addition to register usage being above addressable limits, occupancy // below the minimum is considered like "spilling" as well. TargetOcc = std::nullopt; @@ -2061,68 +1865,94 @@ bool PreRARematStage::setObjective() { // There is no spilling and room to improve occupancy; set up "increased // occupancy targets" for all regions. TargetOcc = DAG.MinOccupancy + 1; - const unsigned VGPRBlockSize = MFI.getDynamicVGPRBlockSize(); + unsigned VGPRBlockSize = + MF.getInfo()->getDynamicVGPRBlockSize(); MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false); MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize); - for (auto [I, Target] : enumerate(RPTargets)) { - Target.setTarget(MaxSGPRs, MaxVGPRs); - if (!Target.satisfied()) - TargetRegions.set(I); - } + ResetTargetRegions(); } + REMAT_DEBUG({ + dbgs() << "Analyzing "; + MF.getFunction().printAsOperand(dbgs(), false); + dbgs() << ": "; + if (OptRegions.empty()) { + dbgs() << "no objective to achieve, occupancy is maximal at " + << MFI.getMaxWavesPerEU(); + } else if (!TargetOcc) { + dbgs() << "reduce spilling (minimum target occupancy is " + << MFI.getMinWavesPerEU() << ')'; + } else { + dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to " + << TargetOcc; + } + dbgs() << '\n'; + for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { + if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) { + dbgs() << REMAT_PREFIX << " [" << I << "] " << OptIt->getSecond() + << '\n'; + } + } + }); + if (OptRegions.empty()) + return false; - return TargetRegions.any(); -} + // Accounts for a reduction in RP in an optimizable region. Returns whether we + // estimate that we have identified enough rematerialization opportunities to + // achieve our goal, and sets Progress to true when this particular reduction + // in pressure was helpful toward that goal. + auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask, + bool &Progress) -> bool { + GCNRPTarget &Target = OptIt->getSecond(); + if (!Target.isSaveBeneficial(Reg)) + return false; + Progress = true; + Target.saveReg(Reg, Mask, DAG.MRI); + if (Target.satisfied()) + OptRegions.erase(OptIt->getFirst()); + return OptRegions.empty(); + }; -bool PreRARematStage::collectRematRegs( - const DenseMap &MIRegion) { // We need up-to-date live-out info. to query live-out register masks in // regions containing rematerializable instructions. DAG.RegionLiveOuts.buildLiveRegMap(); - // Set of registers already marked for potential remterialization; used to - // avoid rematerialization chains. - SmallSet MarkedRegs; - auto IsMarkedForRemat = [&MarkedRegs](const MachineOperand &MO) -> bool { - return MO.isReg() && MarkedRegs.contains(MO.getReg()); - }; + // Cache set of registers that are going to be rematerialized. + DenseSet RematRegs; // Identify rematerializable instructions in the function. for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { - RegionBoundaries Bounds = DAG.Regions[I]; - for (auto MI = Bounds.first; MI != Bounds.second; ++MI) { + auto Region = DAG.Regions[I]; + for (auto MI = Region.first; MI != Region.second; ++MI) { // The instruction must be rematerializable. MachineInstr &DefMI = *MI; if (!isReMaterializable(DefMI)) continue; - // We only support rematerializing virtual registers with one - // definition. + // We only support rematerializing virtual registers with one definition. Register Reg = DefMI.getOperand(0).getReg(); if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg)) continue; // We only care to rematerialize the instruction if it has a single - // non-debug user in a different region. - // FIXME: Allow rematerializations with multiple uses. This should be - // relatively easy to support using the current cost model. + // non-debug user in a different region. The using MI may not belong to a + // region if it is a lone region terminator. MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg); if (!UseMI) continue; auto UseRegion = MIRegion.find(UseMI); - if (UseRegion == MIRegion.end() || UseRegion->second == I) + if (UseRegion != MIRegion.end() && UseRegion->second == I) continue; // Do not rematerialize an instruction if it uses or is used by an // instruction that we have designated for rematerialization. // FIXME: Allow for rematerialization chains: this requires 1. updating - // remat points to account for uses that are rematerialized, and 2. - // either rematerializing the candidates in careful ordering, or - // deferring the MBB RP walk until the entire chain has been - // rematerialized. - const MachineOperand &UseMO = UseMI->getOperand(0); - if (IsMarkedForRemat(UseMO) || - llvm::any_of(DefMI.operands(), IsMarkedForRemat)) + // remat points to account for uses that are rematerialized, and 2. either + // rematerializing the candidates in careful ordering, or deferring the + // MBB RP walk until the entire chain has been rematerialized. + if (Rematerializations.contains(UseMI) || + llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) { + return MO.isReg() && RematRegs.contains(MO.getReg()); + })) continue; // Do not rematerialize an instruction it it uses registers that aren't @@ -2133,181 +1963,106 @@ bool PreRARematStage::collectRematRegs( *DAG.TII)) continue; - // Add the instruction to the rematerializable list. - MarkedRegs.insert(Reg); - RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion); - } - } - - return !RematRegs.empty(); -} + REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI); + RematInstruction &Remat = + Rematerializations.try_emplace(&DefMI, UseMI).first->second; + + bool RematUseful = false; + if (auto It = OptRegions.find(I); It != OptRegions.end()) { + // Optimistically consider that moving the instruction out of its + // defining region will reduce RP in the latter; this assumes that + // maximum RP in the region is reached somewhere between the defining + // instruction and the end of the region. + REMAT_DEBUG(dbgs() << " Defining region is optimizable\n"); + LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg]; + if (ReduceRPInRegion(It, Reg, Mask, RematUseful)) + return true; + } -PreRARematStage::RematReg::RematReg( - MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG, - const DenseMap &MIRegion) - : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()), - LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()), - DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)) { + for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) { + // We are only collecting regions in which the register is a live-in + // (and may be live-through). + auto It = DAG.LiveIns[LIRegion].find(Reg); + if (It == DAG.LiveIns[LIRegion].end() || It->second.none()) + continue; + Remat.LiveInRegions.insert(LIRegion); + + // Account for the reduction in RP due to the rematerialization in an + // optimizable region in which the defined register is a live-in. This + // is exact for live-through region but optimistic in the using region, + // where RP is actually reduced only if maximum RP is reached somewhere + // between the beginning of the region and the rematerializable + // instruction's use. + if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) { + REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n'); + if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg], + RematUseful)) + return true; + } + } - // Mark regions in which the rematerializable register is live. - Register Reg = getReg(); - for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { - auto LiveInIt = DAG.LiveIns[I].find(Reg); - if (LiveInIt != DAG.LiveIns[I].end()) - LiveIn.set(I); - const auto &LiveOuts = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I); - if (auto LiveOutIt = LiveOuts.find(Reg); LiveOutIt != LiveOuts.end()) - LiveOut.set(I); - } - Live |= LiveIn; - Live |= LiveOut; - Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(Reg); -} - -bool PreRARematStage::RematReg::maybeBeneficial( - const BitVector &TargetRegions, ArrayRef RPTargets) const { - Register Reg = getReg(); - for (unsigned I : TargetRegions.set_bits()) { - if (Live[I] && RPTargets[I].isSaveBeneficial(Reg)) - return true; + // If the instruction is not a live-in or live-out in any optimizable + // region then there is no point in rematerializing it. + if (!RematUseful) { + Rematerializations.pop_back(); + REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n"); + } else { + RematRegs.insert(Reg); + } + } } - return false; -} -void PreRARematStage::RematReg::insertMI(unsigned RegionIdx, - MachineInstr *RematMI, - GCNScheduleDAGMILive &DAG) const { - RegionBoundaries &Bounds = DAG.Regions[RegionIdx]; - if (Bounds.first == std::next(MachineBasicBlock::iterator(RematMI))) - Bounds.first = RematMI; - DAG.LIS->InsertMachineInstrInMaps(*RematMI); - DAG.LIS->createAndComputeVirtRegInterval(RematMI->getOperand(0).getReg()); + if (TargetOcc) { + // We were trying to increase occupancy but failed, abort the stage. + REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n"); + Rematerializations.clear(); + return false; + } + REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n"); + return !Rematerializations.empty(); } -PreRARematStage::ScoredRemat::FreqInfo::FreqInfo( - MachineFunction &MF, const GCNScheduleDAGMILive &DAG) { - assert(DAG.MLI && "MLI not defined in DAG"); - MachineBranchProbabilityInfo MBPI; - MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI); - - const unsigned NumRegions = DAG.Regions.size(); - MinFreq = MBFI.getEntryFreq().getFrequency(); - MaxFreq = 0; - Regions.reserve(NumRegions); - for (unsigned I = 0; I < NumRegions; ++I) { - MachineBasicBlock *MBB = DAG.Regions[I].first->getParent(); - uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency(); - Regions.push_back(BlockFreq); - if (BlockFreq && BlockFreq < MinFreq) - MinFreq = BlockFreq; - else if (BlockFreq > MaxFreq) - MaxFreq = BlockFreq; - } - if (!MinFreq) - return; - - // Scale everything down if frequencies are high. - if (MinFreq >= ScaleFactor * ScaleFactor) { - for (uint64_t &Freq : Regions) - Freq /= ScaleFactor; - MinFreq /= ScaleFactor; - MaxFreq /= ScaleFactor; - } -} - -PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat, const FreqInfo &Freq, - const GCNScheduleDAGMILive &DAG) - : Remat(Remat), NumRegs(getNumRegs(DAG)), FreqDiff(getFreqDiff(Freq)) {} - -unsigned PreRARematStage::ScoredRemat::getNumRegs( - const GCNScheduleDAGMILive &DAG) const { - const TargetRegisterClass &RC = *DAG.MRI.getRegClass(Remat->getReg()); - unsigned RegSize = DAG.TRI->getRegSizeInBits(RC); - if (unsigned SubIdx = Remat->DefMI->getOperand(0).getSubReg()) { - // The following may return -1 (i.e., a large unsigned number) on indices - // that may be used to access subregisters of multiple sizes; in such cases - // fallback on the size derived from the register class. - unsigned SubRegSize = DAG.TRI->getSubRegIdxSize(SubIdx); - if (SubRegSize < RegSize) - RegSize = SubRegSize; - } - return divideCeil(RegSize, 32); -} - -int64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const { - // Get frequencies of defining and using regions. A rematerialization from the - // least frequent region to the most frequent region will yield the greatest - // latency penalty and therefore should get minimum score. Reciprocally, a - // rematerialization in the other direction should get maximum score. Default - // to values that will yield the worst possible score given known frequencies - // in order to penalize rematerializations from or into regions whose - // frequency is unknown. - int64_t DefOrMin = std::max(Freq.Regions[Remat->DefRegion], Freq.MinFreq); - int64_t UseOrMax = Freq.Regions[Remat->UseRegion]; - if (!UseOrMax) - UseOrMax = Freq.MaxFreq; - return DefOrMin - UseOrMax; -} - -void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions, - ArrayRef RPTargets, - const FreqInfo &FreqInfo, - bool ReduceSpill) { - MaxFreq = 0; - RegionImpact = 0; - for (unsigned I : TargetRegions.set_bits()) { - if (!Remat->Live[I] || !RPTargets[I].isSaveBeneficial(Remat->getReg())) - continue; - bool UnusedLT = Remat->isUnusedLiveThrough(I); - - // Regions in which RP is guaranteed to decrease have more weight. - RegionImpact += UnusedLT ? 2 : 1; +void PreRARematStage::rematerialize() { + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - if (ReduceSpill) { - uint64_t Freq = FreqInfo.Regions[I]; - if (!UnusedLT) { - // Apply a frequency penalty in regions in which we are not sure that RP - // will decrease. - Freq /= 2; - } - MaxFreq = std::max(MaxFreq, Freq); + // Collect regions whose RP changes in unpredictable way; we will have to + // fully recompute their RP after all rematerailizations. + DenseSet RecomputeRP; + + // Rematerialize all instructions. + for (auto &[DefMI, Remat] : Rematerializations) { + MachineBasicBlock::iterator InsertPos(Remat.UseMI); + Register Reg = DefMI->getOperand(0).getReg(); + unsigned DefRegion = MIRegion.at(DefMI); + + // Rematerialize DefMI to its use block. + TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, + AMDGPU::NoSubRegister, *DefMI); + Remat.RematMI = &*std::prev(InsertPos); + DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI); + + // Update region boundaries in regions we sinked from (remove defining MI) + // and to (insert MI rematerialized in use block). Only then we can erase + // the original MI. + DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr); + auto UseRegion = MIRegion.find(Remat.UseMI); + if (UseRegion != MIRegion.end()) { + DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos, + Remat.RematMI); } - } - RegionImpact *= NumRegs; -} + DAG.LIS->RemoveMachineInstrFromMaps(*DefMI); + DefMI->eraseFromParent(); -void PreRARematStage::rematerialize(const RematReg &Remat, - BitVector &RecomputeRP, - RollbackInfo *Rollback) { - const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - MachineInstr &DefMI = *Remat.DefMI; - Register Reg = DefMI.getOperand(0).getReg(); - Register NewReg = DAG.MRI.cloneVirtualRegister(Reg); - - // Rematerialize the register in the region where it is used. - MachineBasicBlock::iterator InsertPos = Remat.UseMI; - TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI); - MachineInstr *RematMI = &*std::prev(InsertPos); - Remat.UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI); - Remat.insertMI(Remat.UseRegion, RematMI, DAG); - if (Rollback) { - Rollback->RematMI = RematMI; - // Make the original MI a debug instruction so that it does not influence - // scheduling. - DefMI.setDesc(TII->get(TargetOpcode::DBG_VALUE)); - } else { - // Just delete the original instruction if it cannot be rolled back. - DAG.deleteMI(Remat.DefRegion, &DefMI); - } + // Collect all regions impacted by the rematerialization and update their + // live-in/RP information. + for (unsigned I : Remat.LiveInRegions) { + ImpactedRegions.insert({I, DAG.Pressure[I]}); + GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I]; - // Remove the register from all regions where it is a live-in or live-out - // and adjust RP targets. - for (unsigned I : Remat.Live.set_bits()) { #ifdef EXPENSIVE_CHECKS - if (!Remat.LiveIn[I] && Remat.LiveOut[I]) { - // All uses are known to be available / live at the remat point. Thus, - // the uses should already be live in to the region. - for (MachineOperand &MO : DefMI.operands()) { + // All uses are known to be available / live at the remat point. Thus, the + // uses should already be live in to the region. + for (MachineOperand &MO : DefMI->operands()) { if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; @@ -2320,7 +2075,7 @@ void PreRARematStage::rematerialize(const RematReg &Remat, if (LI.hasSubRanges() && MO.getSubReg()) LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg()); - LaneBitmask LiveInMask = DAG.LiveIns[I].at(UseReg); + LaneBitmask LiveInMask = RegionLiveIns.at(UseReg); LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM); // If this register has lanes not covered by the LiveIns, be sure they // do not map to any subrange. ref: @@ -2331,74 +2086,65 @@ void PreRARematStage::rematerialize(const RematReg &Remat, assert((SR.LaneMask & UncoveredLanes).none()); } } - } #endif - // This save is guaranteed in regions in which the register is live-through - // and unused but optimistic in all other regions where the register is - // live. - RPTargets[I].saveReg(Reg, Remat.Mask, DAG.MRI); - DAG.LiveIns[I].erase(Reg); - DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg); - if (!Remat.isUnusedLiveThrough(I)) - RecomputeRP.set(I); - } - - RescheduleRegions |= Remat.Live; -} - -void PreRARematStage::rollback(const RollbackInfo &Rollback, - BitVector &RecomputeRP) const { - auto &[Remat, RematMI] = Rollback; - - // Switch back to using the original register and delete the - // rematerialization. - Remat->DefMI->setDesc(DAG.TII->get(RematMI->getOpcode())); - Register Reg = RematMI->getOperand(0).getReg(); - Register OriginalReg = Remat->DefMI->getOperand(0).getReg(); - Remat->UseMI->substituteRegister(Reg, OriginalReg, 0, *DAG.TRI); - REMAT_DEBUG(dbgs() << '[' << Remat->UseRegion - << "] Deleting rematerialization " << *RematMI); - DAG.deleteMI(Remat->UseRegion, RematMI); - - // Regenerate the original register's interval as slot indices may have - // changed slightly from before re-scheduling, and re-add it as a - // live-in/live-out in all regions it used to be one in. - DAG.LIS->removeInterval(OriginalReg); - DAG.LIS->createAndComputeVirtRegInterval(OriginalReg); - std::pair LiveReg(OriginalReg, Remat->Mask); - for (unsigned I : Remat->LiveIn.set_bits()) - DAG.LiveIns[I].insert(LiveReg); - for (unsigned I : Remat->LiveOut.set_bits()) - DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg); - RecomputeRP |= Remat->Live; -} - -void PreRARematStage::unsetSatisifedRPTargets(const BitVector &Regions) { - for (unsigned I : Regions.set_bits()) { - if (TargetRegions[I] && RPTargets[I].satisfied()) { - REMAT_DEBUG(dbgs() << " [" << I << "] Target reached!\n"); - TargetRegions.reset(I); + // The register is no longer a live-in in all regions but the one that + // contains the single use. In live-through regions, maximum register + // pressure decreases predictably so we can directly update it. In the + // using region, maximum RP may or may not decrease, so we will mark it + // for re-computation after all materializations have taken place. + LaneBitmask PrevMask = RegionLiveIns[Reg]; + RegionLiveIns.erase(Reg); + RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask}); + if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent()) + DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); + else + RecomputeRP.insert(I); } - } -} - -bool PreRARematStage::updateAndVerifyRPTargets(const BitVector &Regions) { - bool TooOptimistic = false; - for (unsigned I : Regions.set_bits()) { - GCNRPTarget &Target = RPTargets[I]; - Target.setRP(DAG.getRealRegPressure(I)); + // RP in the region from which the instruction was rematerialized may or may + // not decrease. + ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]}); + RecomputeRP.insert(DefRegion); + + // Recompute live interval to reflect the register's rematerialization. + Register RematReg = Remat.RematMI->getOperand(0).getReg(); + DAG.LIS->removeInterval(RematReg); + DAG.LIS->createAndComputeVirtRegInterval(RematReg); + } + + // All regions impacted by at least one rematerialization must be rescheduled. + // Maximum pressure must also be recomputed for all regions where it changed + // non-predictably and checked against the target occupancy. + unsigned DynamicVGPRBlockSize = + MF.getInfo()->getDynamicVGPRBlockSize(); + AchievedOcc = MFI.getMaxWavesPerEU(); + for (auto &[I, OriginalRP] : ImpactedRegions) { + bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second; + RescheduleRegions[I] = !IsEmptyRegion; + if (!RecomputeRP.contains(I)) + continue; - // Since we were optimistic in assessing RP decreases in these regions, we - // may need to remark the target as a target region if RP didn't decrease - // as expected. - if (!TargetRegions[I] && !Target.satisfied()) { - REMAT_DEBUG(dbgs() << " [" << I << "] Incorrect RP estimation\n"); - TooOptimistic = true; - TargetRegions.set(I); + GCNRegPressure RP; + if (IsEmptyRegion) { + RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]); + } else { + GCNDownwardRPTracker RPT(*DAG.LIS); + auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first, + DAG.Regions[I].second); + if (NonDbgMI == DAG.Regions[I].second) { + // Region is non-empty but contains only debug instructions. + RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]); + } else { + RPT.reset(*NonDbgMI, &DAG.LiveIns[I]); + RPT.advance(DAG.Regions[I].second); + RP = RPT.moveMaxPressure(); + } } + DAG.Pressure[I] = RP; + AchievedOcc = + std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize)); } - return TooOptimistic; + REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n"); } // Copied from MachineLICM @@ -2422,37 +2168,78 @@ bool PreRARematStage::isReMaterializable(const MachineInstr &MI) { void PreRARematStage::finalizeGCNSchedStage() { // We consider that reducing spilling is always beneficial so we never // rollback rematerializations in such cases. It's also possible that - // rescheduling lowers occupancy over the one achieved just through remats, - // in which case we do not want to rollback either (the rescheduling was - // already reverted in PreRARematStage::shouldRevertScheduling in such - // cases). + // rescheduling lowers occupancy over the one achieved just through remats, in + // which case we do not want to rollback either (the rescheduling was already + // reverted in PreRARematStage::shouldRevertScheduling in such cases). unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy); - if (!TargetOcc || MaxOcc >= *TargetOcc) { - // Fully delete the original MIs that were rematerialized. - for (const RollbackInfo &Rollback : Rollbacks) - DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI); + if (!TargetOcc || MaxOcc >= *TargetOcc) return; + + REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n"); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // Rollback the rematerializations. + for (const auto &[DefMI, Remat] : Rematerializations) { + MachineInstr &RematMI = *Remat.RematMI; + unsigned DefRegion = MIRegion.at(DefMI); + MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second); + MachineBasicBlock *MBB = RegionBB[DefRegion]; + Register Reg = RematMI.getOperand(0).getReg(); + + // Re-rematerialize MI at the end of its original region. Note that it may + // not be rematerialized exactly in the same position as originally within + // the region, but it should not matter much. + TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI); + MachineInstr *NewMI = &*std::prev(InsertPos); + DAG.LIS->InsertMachineInstrInMaps(*NewMI); + + auto UseRegion = MIRegion.find(Remat.UseMI); + if (UseRegion != MIRegion.end()) { + DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI, + nullptr); + } + DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI); + + // Erase rematerialized MI. + DAG.LIS->RemoveMachineInstrFromMaps(RematMI); + RematMI.eraseFromParent(); + + // Recompute live interval for the re-rematerialized register + DAG.LIS->removeInterval(Reg); + DAG.LIS->createAndComputeVirtRegInterval(Reg); + + // Re-add the register as a live-in in all regions it used to be one in. + for (unsigned LIRegion : Remat.LiveInRegions) + DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})}); } - // Rollback, then recompute pressure in all affected regions. - REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n"); - BitVector RecomputeRP(DAG.Regions.size()); - for (const RollbackInfo &Rollback : Rollbacks) - rollback(Rollback, RecomputeRP); - for (unsigned I : RecomputeRP.set_bits()) - DAG.Pressure[I] = DAG.getRealRegPressure(I); + // Reset RP in all impacted regions. + for (auto &[I, OriginalRP] : ImpactedRegions) + DAG.Pressure[I] = OriginalRP; GCNSchedStage::finalizeGCNSchedStage(); } -void GCNScheduleDAGMILive::deleteMI(unsigned RegionIdx, MachineInstr *MI) { - // It's not possible for the deleted instruction to be upper region boundary - // since we don't delete region terminators. - if (Regions[RegionIdx].first == MI) - Regions[RegionIdx].first = std::next(MachineBasicBlock::iterator(MI)); - LIS->removeInterval(MI->getOperand(0).getReg()); - LIS->RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); +void GCNScheduleDAGMILive::updateRegionBoundaries( + RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI, + MachineInstr *NewMI) { + assert((!NewMI || NewMI != RegionBounds.second) && + "cannot remove at region end"); + + if (RegionBounds.first == RegionBounds.second) { + assert(NewMI && "cannot remove from an empty region"); + RegionBounds.first = NewMI; + return; + } + + // We only care for modifications at the beginning of a non-empty region since + // the upper region boundary is exclusive. + if (MI != RegionBounds.first) + return; + if (!NewMI) + RegionBounds.first = std::next(MI); // Removal + else + RegionBounds.first = NewMI; // Insertion } static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 00876601cbc77..95a931b9beb2a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -18,8 +18,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineScheduler.h" -#include -#include namespace llvm { @@ -302,12 +300,18 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // Compute and cache live-ins and pressure for all regions in block. void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); + /// If necessary, updates a region's boundaries following insertion ( \p NewMI + /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region. + /// For an MI removal, this must be called before the MI is actually erased + /// from its parent MBB. + void updateRegionBoundaries(RegionBoundaries &RegionBounds, + MachineBasicBlock::iterator MI, + MachineInstr *NewMI); + void runSchedStages(); std::unique_ptr createSchedStage(GCNSchedStageID SchedStageID); - void deleteMI(unsigned RegionIdx, MachineInstr *MI); - public: GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S); @@ -447,222 +451,65 @@ class ClusteredLowOccStage : public GCNSchedStage { }; /// Attempts to reduce function spilling or, if there is no spilling, to -/// increase function occupancy by one with respect to register usage by sinking -/// rematerializable instructions to their use. When the stage estimates that -/// reducing spilling or increasing occupancy is possible, it tries to -/// rematerialize as few registers as possible to reduce potential negative +/// increase function occupancy by one with respect to ArchVGPR usage by sinking +/// rematerializable instructions to their use. When the stage +/// estimates reducing spilling or increasing occupancy is possible, as few +/// instructions as possible are rematerialized to reduce potential negative /// effects on function latency. -/// -/// The stage only supports rematerializing registers that meet all of the -/// following constraints. -/// 1. The register is virtual and has a single defining instruction. -/// 2. The single defining instruction is either deemed rematerializable by the -/// target-independent logic, or if not, has no non-constant and -/// non-ignorable physical register use. -/// 3 The register has no virtual register use whose live range would be -/// extended by the rematerialization. -/// 4. The register has a single non-debug user in a different region from its -/// defining region. -/// 5. The register is not used by or using another register that is going to be -/// rematerialized. class PreRARematStage : public GCNSchedStage { private: - /// A rematerializable register. - struct RematReg { - /// Single MI defining the rematerializable register. - MachineInstr *DefMI; - /// Single user of the rematerializable register. + /// Useful information about a rematerializable instruction. + struct RematInstruction { + /// Single use of the rematerializable instruction's defined register, + /// located in a different block. MachineInstr *UseMI; - /// Regions in which the register is live-in/live-out/live anywhere. - BitVector LiveIn, LiveOut, Live; - /// The rematerializable register's lane bitmask. - LaneBitmask Mask; - /// Defining and using regions. - unsigned DefRegion, UseRegion; - - RematReg(MachineInstr *DefMI, MachineInstr *UseMI, - GCNScheduleDAGMILive &DAG, - const DenseMap &MIRegion); - - /// Returns the rematerializable register. Do not call after deleting the - /// original defining instruction. - Register getReg() const { return DefMI->getOperand(0).getReg(); } - - /// Determines whether this rematerialization may be beneficial in at least - /// one target region. - bool maybeBeneficial(const BitVector &TargetRegions, - ArrayRef RPTargets) const; - - /// Determines if the register is both unused and live-through in region \p - /// I. This guarantees that rematerializing it will reduce RP in the region. - bool isUnusedLiveThrough(unsigned I) const { - assert(I < Live.size() && "region index out of range"); - return LiveIn[I] && LiveOut[I] && I != UseRegion; - } - - /// Updates internal structures following a MI rematerialization. Part of - /// the stage instead of the DAG because it makes assumptions that are - /// specific to the rematerialization process. - void insertMI(unsigned RegionIdx, MachineInstr *RematMI, - GCNScheduleDAGMILive &DAG) const; - }; - - /// A scored rematerialization candidate. Higher scores indicate more - /// beneficial rematerializations. A null score indicate the rematerialization - /// is not helpful to reduce RP in target regions. - struct ScoredRemat { - /// The rematerializable register under consideration. - RematReg *Remat; - - /// Execution frequency information required by scoring heuristics. - /// Frequencies are scaled down if they are high to avoid overflow/underflow - /// when combining them. - struct FreqInfo { - /// Per-region execution frequencies. 0 when unknown. - SmallVector Regions; - /// Minimum and maximum observed frequencies. - uint64_t MinFreq, MaxFreq; - - FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG); - - private: - static const uint64_t ScaleFactor = 1024; - }; - - /// This only initializes state-independent characteristics of \p Remat, not - /// the actual score. - ScoredRemat(RematReg *Remat, const FreqInfo &Freq, - const GCNScheduleDAGMILive &DAG); - - /// Updates the rematerialization's score w.r.t. the current \p RPTargets. - /// \p RegionFreq indicates the frequency of each region - void update(const BitVector &TargetRegions, ArrayRef RPTargets, - const FreqInfo &Freq, bool ReduceSpill); - - /// Returns whether the current score is null, indicating the - /// rematerialization is useless. - bool hasNullScore() const { return !RegionImpact; } - - /// Compare score components of non-null scores pair-wise. A null score is - /// always strictly lesser than another non-null score. - bool operator<(const ScoredRemat &O) const { - if (hasNullScore()) - return !O.hasNullScore(); - if (O.hasNullScore()) - return false; - if (MaxFreq != O.MaxFreq) - return MaxFreq < O.MaxFreq; - if (FreqDiff != O.FreqDiff) - return FreqDiff < O.FreqDiff; - if (RegionImpact != O.RegionImpact) - return RegionImpact < O.RegionImpact; - // Break ties using pointer to rematerializable register. Rematerializable - // registers are collected in instruction order so, within the same - // region, this will prefer registers defined earlier that have longer - // live ranges in their defining region (since the registers we consider - // are always live-out in their defining region). - return Remat > O.Remat; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - Printable print() const; -#endif - - private: - /// Number of 32-bit registers this rematerialization covers. - unsigned NumRegs; - - // The three members below are the scoring components, top to bottom from - // most important to least important when comparing candidates. - - /// Frequency of impacted target region with highest known frequency. This - /// only matters when the stage is trying to reduce spilling, so it is - /// always 0 when it is not. - uint64_t MaxFreq; - /// Frequency difference between defining and using regions. Negative values - /// indicate we are rematerializing to higher frequency regions; positive - /// values indicate the contrary. - int64_t FreqDiff; - /// Expected number of target regions impacted by the rematerialization, - /// scaled by the size of the register being rematerialized. - unsigned RegionImpact; - - unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const; - - int64_t getFreqDiff(const FreqInfo &Freq) const; - }; - - /// Holds enough information to rollback a rematerialization decision post - /// re-scheduling. - struct RollbackInfo { - /// The rematerializable register under consideration. - const RematReg *Remat; - /// The rematerialized MI replacing the original defining MI. + /// Rematerialized version of \p DefMI, set in + /// PreRARematStage::rematerialize. Used for reverting rematerializations. MachineInstr *RematMI; + /// Set of regions in which the rematerializable instruction's defined + /// register is a live-in. + SmallDenseSet LiveInRegions; - RollbackInfo(const RematReg *Remat) : Remat(Remat) {} + RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {} }; + /// Maps all MIs to their parent region. MI terminators are considered to be + /// outside the region they delimitate, and as such are not stored in the map. + DenseMap MIRegion; /// Parent MBB to each region, in region order. SmallVector RegionBB; - - /// Register pressure targets for all regions. - SmallVector RPTargets; - /// Regions which are above the stage's RP target. - BitVector TargetRegions; - /// The target occupancy the set is trying to achieve. Empty when the + /// Collects instructions to rematerialize. + MapVector Rematerializations; + /// Collects regions whose live-ins or register pressure will change due to + /// rematerializations. + DenseMap ImpactedRegions; + /// In case we need to rollback rematerializations, save lane masks for all + /// rematerialized registers in all regions in which they are live-ins. + DenseMap, LaneBitmask> RegMasks; + /// After successful stage initialization, indicates which regions should be + /// rescheduled. + BitVector RescheduleRegions; + /// The target occupancy the stage is trying to achieve. Empty when the /// objective is spilling reduction. std::optional TargetOcc; /// Achieved occupancy *only* through rematerializations (pre-rescheduling). - /// Smaller than or equal to the target occupancy, when it is defined. + /// Smaller than or equal to the target occupancy. unsigned AchievedOcc; - /// List of rematerializable registers. - SmallVector RematRegs; - /// List of rematerializations to rollback if rematerialization does not end - /// up being beneficial. - SmallVector Rollbacks; - /// After successful stage initialization, indicates which regions should be - /// rescheduled. - BitVector RescheduleRegions; - - /// Determines the stage's objective (increasing occupancy or reducing - /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to - /// achieve that objective and mark those that don't achieve it in \ref - /// TargetRegions. Returns whether there is any target region. - bool setObjective(); - - /// Unsets target regions in \p Regions whose RP target has been reached. - void unsetSatisifedRPTargets(const BitVector &Regions); - - /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets - /// again all \ref TargetRegions that were optimistically marked as satisfied - /// but are actually not, and returns whether there were any such regions. - bool updateAndVerifyRPTargets(const BitVector &Regions); - - /// Collects all rematerializable registers and appends them to \ref - /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any - /// rematerializable register was found. - bool collectRematRegs(const DenseMap &MIRegion); - - /// Rematerializes \p Remat. This removes the rematerialized register from - /// live-in/out lists in the DAG and updates RP targets in all affected - /// regions, which are also marked in \ref RescheduleRegions. Regions in which - /// RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback - /// is non-null, fills it with required information to be able to rollback the - /// rematerialization post-rescheduling. - void rematerialize(const RematReg &Remat, BitVector &RecomputeRP, - RollbackInfo *Rollback); - - /// Rollbacks the rematerialization decision represented by \p Rollback. This - /// update live-in/out lists in the DAG but does not update cached register - /// pressures. Regions in which RP may be impacted are marked in \ref - /// RecomputeRP. - void rollback(const RollbackInfo &Rollback, BitVector &RecomputeRP) const; + /// Returns whether remat can reduce spilling or increase function occupancy + /// by 1 through rematerialization. If it can do one, collects instructions in + /// PreRARematStage::Rematerializations and sets the target occupancy in + /// PreRARematStage::TargetOccupancy. + bool canIncreaseOccupancyOrReduceSpill(); /// Whether the MI is rematerializable bool isReMaterializable(const MachineInstr &MI); + /// Rematerializes all instructions in PreRARematStage::Rematerializations + /// and stores the achieved occupancy after remat in + /// PreRARematStage::AchievedOcc. + void rematerialize(); + /// If remat alone did not increase occupancy to the target one, rollbacks all /// rematerializations and resets live-ins/RP in all regions impacted by the /// stage to their pre-stage values. @@ -676,12 +523,7 @@ class PreRARematStage : public GCNSchedStage { bool shouldRevertScheduling(unsigned WavesAfter) override; PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) - : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()), - RescheduleRegions(DAG.Regions.size()) { - const unsigned NumRegions = DAG.Regions.size(); - RPTargets.reserve(NumRegions); - RegionBB.reserve(NumRegions); - } + : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {} }; class ILPInitialScheduleStage : public GCNSchedStage { diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir deleted file mode 100644 index 0bfcb638038fd..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-rematerialization-scoring.mir +++ /dev/null @@ -1,523 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck %s - -# All tests are almost identical, the only differences being that some -# VGPR-defining instructions are progressively made artificially -# unrematerializable with an implicit def to test rematerialization -# priorities. The CFG is the following for all tests in the file. -# -# +---+ -# | 0 | -# +---+ -# | -# v -# +---+ -# +------>| 1 |-----+ -# | +---+ | -# | | v -# | | +---+ -# | | | 2 | -# | | +-+-+ -# | v | -# +---+ +---+ | -# | 4 |<----| 3 |<----+ -# +---+ +---+ -# | -# v -# +---+ -# | 5 | -# +---+ - -# %32's defining and using region frequencies are identical therefore it is the -# best register to rematerialize. -name: favor_same_frequency -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true -body: | - ; CHECK-LABEL: name: favor_same_frequency - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec - ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if - ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]] - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]], implicit [[V_CVT_I32_F64_e32_15]] - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]] - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]] - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - liveins: $vgpr0, $sgpr0_sgpr1 - - %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - %loop_if_bound:vgpr_32 = COPY $vgpr0 - %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec - %loop_counter:sreg_32 = COPY %mem_data.sub1 - - %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode - %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode - %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode - %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode - %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode - %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode - %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode - %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode - %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode - - bb.1: - successors: %bb.2, %bb.3 - - %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - $exec = S_MOV_B64_term %exec_if - S_CBRANCH_EXECZ %bb.3, implicit $exec - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - - S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 - - bb.3: - successors: %bb.4(0x7c000000), %bb.5(0x04000000) - - $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - S_CBRANCH_SCC0 %bb.5, implicit killed $scc - - bb.4: - successors: %bb.1 - - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 - S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15 - S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23 - - S_BRANCH %bb.1 - - bb.5: - - S_NOP 0, implicit %32 - - S_ENDPGM 0 -... ---- -# bb.2's frequency is lesser than bb.4's therefore it is preferable to -# rematerialize registers in bb.2 instead of bb.4. -name: favor_lower_frequency -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true -body: | - ; CHECK-LABEL: name: favor_lower_frequency - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec - ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if - ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]] - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]] - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]] - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - liveins: $vgpr0, $sgpr0_sgpr1 - - %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - %loop_if_bound:vgpr_32 = COPY $vgpr0 - %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec - %loop_counter:sreg_32 = COPY %mem_data.sub1 - - %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode - %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode - %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode - %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode - %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode - %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode - %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode - %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode - %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - - bb.1: - successors: %bb.2, %bb.3 - - %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - $exec = S_MOV_B64_term %exec_if - S_CBRANCH_EXECZ %bb.3, implicit $exec - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - - S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 - - bb.3: - successors: %bb.4(0x7c000000), %bb.5(0x04000000) - - $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - S_CBRANCH_SCC0 %bb.5, implicit killed $scc - - bb.4: - successors: %bb.1 - - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 - S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15 - S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23 - - S_BRANCH %bb.1 - - bb.5: - - S_NOP 0, implicit %32 - - S_ENDPGM 0 -... ---- -# Rematerializing registers used in bb.4 is the only option. -name: remat_in_only_possible_region -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true -body: | - ; CHECK-LABEL: name: remat_in_only_possible_region - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec - ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - ; CHECK-NEXT: $exec = S_MOV_B64_term %exec_if - ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]], implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]] - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - ; CHECK-NEXT: %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - ; CHECK-NEXT: S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] - ; CHECK-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]] - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]] - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - liveins: $vgpr0, $sgpr0_sgpr1 - - %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1 - %loop_if_bound:vgpr_32 = COPY $vgpr0 - %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec - %loop_counter:sreg_32 = COPY %mem_data.sub1 - - %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode - %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode - %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode - %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode - %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode - %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode - %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode - %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode - %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode - %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode - %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode - %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode - %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode - %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode - %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode - %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode - %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode - %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode - %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode - %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 - %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0 - %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0 - %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0 - %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0 - %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 - %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 - %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - - bb.1: - successors: %bb.2, %bb.3 - - %exec_save_if:sreg_64 = COPY $exec, implicit-def $exec - %exec_if:sreg_64 = S_AND_B64 %exec_save_if, %exec_loop_mask, implicit-def dead $scc - $exec = S_MOV_B64_term %exec_if - S_CBRANCH_EXECZ %bb.3, implicit $exec - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - - S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 - - bb.3: - successors: %bb.4(0x7c000000), %bb.5(0x04000000) - - $exec = S_OR_B64 $exec, %exec_save_if, implicit-def $scc - %loop_counter:sreg_32 = S_ADD_I32 %loop_counter, -1, implicit-def dead $scc - S_CMP_LG_U32 %loop_counter, 0, implicit-def $scc - S_CBRANCH_SCC0 %bb.5, implicit killed $scc - - bb.4: - successors: %bb.1 - - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 - S_NOP 0, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15 - S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23 - - S_BRANCH %bb.1 - - bb.5: - - S_NOP 0, implicit %32 - - S_ENDPGM 0 -... diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir index 1daa709ab6439..3b3ea3f37db80 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir @@ -37,89 +37,88 @@ body: | ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.1(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6 - ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7 - ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8 - ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9 - ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10 - ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 - ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12 - ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13 - ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14 - ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15 - ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16 - ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17 - ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18 - ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19 - ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20 - ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21 - ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22 - ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23 - ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24 - ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25 - ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26 - ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27 - ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28 - ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29 - ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30 - ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31 - ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32 - ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33 - ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34 - ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35 - ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36 - ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37 - ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38 - ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39 - ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40 - ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41 - ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42 - ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43 - ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44 - ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45 - ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46 - ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47 - ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48 - ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49 - ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50 - ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51 - ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52 - ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53 - ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54 - ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55 - ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56 - ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57 - ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58 - ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59 - ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60 - ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61 - ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62 - ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63 - ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64 - ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65 - ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66 - ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67 - ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68 - ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69 - ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70 - ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71 - ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72 - ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73 - ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74 - ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75 - ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76 - ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77 - ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78 - ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79 + ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 + ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2 + ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3 + ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4 + ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5 + ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6 + ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7 + ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8 + ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9 + ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10 + ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12 + ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13 + ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14 + ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15 + ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16 + ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17 + ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18 + ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19 + ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20 + ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21 + ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22 + ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23 + ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24 + ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25 + ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26 + ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27 + ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28 + ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29 + ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30 + ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31 + ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32 + ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33 + ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34 + ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35 + ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36 + ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37 + ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38 + ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39 + ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40 + ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41 + ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42 + ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43 + ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44 + ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45 + ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46 + ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47 + ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48 + ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49 + ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50 + ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51 + ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52 + ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53 + ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54 + ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55 + ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56 + ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57 + ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58 + ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59 + ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60 + ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61 + ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62 + ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63 + ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64 + ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65 + ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66 + ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67 + ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68 + ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69 + ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70 + ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71 + ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72 + ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73 + ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74 + ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75 + ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76 + ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77 + ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78 + ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.1: - ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 - ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2 - ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3 - ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4 - ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]] - ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5 + ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]] ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]] @@ -135,95 +134,95 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]] ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]] ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]] + ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: small_num_sgprs_as_spill ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 6 - ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 7 - ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 8 - ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 9 - ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 10 - ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 - ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 12 - ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 13 - ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 14 - ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 15 - ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 16 - ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 17 - ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 18 - ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 19 - ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 20 - ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 21 - ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 22 - ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 23 - ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 24 - ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 25 - ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 26 - ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 27 - ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 28 - ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 29 - ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 30 - ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 31 - ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 32 - ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 33 - ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 34 - ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 35 - ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 36 - ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 37 - ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 38 - ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 39 - ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 40 - ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 41 - ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 42 - ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 43 - ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 44 - ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 45 - ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 46 - ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 47 - ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 48 - ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 49 - ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 50 - ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 51 - ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 52 - ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 53 - ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 54 - ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 55 - ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 56 - ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 57 - ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 58 - ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 59 - ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 60 - ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 61 - ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 62 - ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 63 - ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 64 - ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 65 - ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 66 - ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 67 - ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 68 - ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 69 - ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 70 - ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 71 - ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 72 - ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 73 - ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 74 - ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 75 - ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 76 - ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 77 - ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 78 - ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 79 + ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 + ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2 + ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3 + ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4 + ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5 + ; GFX90A-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6 + ; GFX90A-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7 + ; GFX90A-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8 + ; GFX90A-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9 + ; GFX90A-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10 + ; GFX90A-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; GFX90A-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12 + ; GFX90A-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13 + ; GFX90A-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14 + ; GFX90A-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15 + ; GFX90A-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16 + ; GFX90A-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17 + ; GFX90A-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18 + ; GFX90A-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19 + ; GFX90A-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20 + ; GFX90A-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21 + ; GFX90A-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22 + ; GFX90A-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23 + ; GFX90A-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24 + ; GFX90A-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25 + ; GFX90A-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26 + ; GFX90A-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27 + ; GFX90A-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28 + ; GFX90A-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29 + ; GFX90A-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30 + ; GFX90A-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31 + ; GFX90A-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32 + ; GFX90A-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33 + ; GFX90A-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34 + ; GFX90A-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35 + ; GFX90A-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36 + ; GFX90A-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37 + ; GFX90A-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38 + ; GFX90A-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39 + ; GFX90A-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40 + ; GFX90A-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41 + ; GFX90A-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42 + ; GFX90A-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43 + ; GFX90A-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44 + ; GFX90A-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45 + ; GFX90A-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46 + ; GFX90A-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47 + ; GFX90A-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48 + ; GFX90A-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49 + ; GFX90A-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50 + ; GFX90A-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51 + ; GFX90A-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52 + ; GFX90A-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53 + ; GFX90A-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54 + ; GFX90A-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55 + ; GFX90A-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56 + ; GFX90A-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57 + ; GFX90A-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58 + ; GFX90A-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59 + ; GFX90A-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60 + ; GFX90A-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61 + ; GFX90A-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62 + ; GFX90A-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63 + ; GFX90A-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64 + ; GFX90A-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65 + ; GFX90A-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66 + ; GFX90A-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67 + ; GFX90A-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68 + ; GFX90A-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69 + ; GFX90A-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70 + ; GFX90A-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71 + ; GFX90A-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72 + ; GFX90A-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73 + ; GFX90A-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74 + ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75 + ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76 + ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77 + ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78 + ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: - ; GFX90A-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; GFX90A-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 - ; GFX90A-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 2 - ; GFX90A-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 3 - ; GFX90A-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 4 - ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]] - ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 5 + ; GFX90A-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]] ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]] @@ -239,6 +238,7 @@ body: | ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]] ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]] ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]] + ; GFX90A-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]] ; GFX90A-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 @@ -796,6 +796,9 @@ body: | ; GFX908-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 @@ -836,18 +839,15 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]] - ; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]] ; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF28]] + ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF31]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: reduce_arch_and_acc_vgrp_spill @@ -910,6 +910,9 @@ body: | ; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode @@ -924,18 +927,15 @@ body: | ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]] - ; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]] ; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]] + ; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[V_CVT_I32_F64_e32_31]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF31]] ; GFX90A-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 @@ -2174,8 +2174,6 @@ body: | ; GFX908-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.1: ; GFX908-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF @@ -2186,7 +2184,8 @@ body: | ; GFX908-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX908-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: ; GFX908-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX908-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]] ; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]] @@ -2212,7 +2211,8 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]] ; GFX908-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]] ; GFX908-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]] - ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]] + ; GFX908-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX90A-LABEL: name: reduce_spill_agpr_above_addressable_limit @@ -2465,10 +2465,6 @@ body: | ; GFX90A-NEXT: [[DEF243:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF244:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF245:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode - ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.1: ; GFX90A-NEXT: [[DEF246:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF247:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF248:%[0-9]+]]:agpr_32 = IMPLICIT_DEF @@ -2479,7 +2475,10 @@ body: | ; GFX90A-NEXT: [[DEF253:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF254:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[DEF255:%[0-9]+]]:agpr_32 = IMPLICIT_DEF - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]], implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]] + ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode + ; GFX90A-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: ; GFX90A-NEXT: [[DEF256:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: S_NOP 0, implicit [[DEF256]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]] ; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]] @@ -2505,7 +2504,8 @@ body: | ; GFX90A-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]] ; GFX90A-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]] ; GFX90A-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]] - ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[DEF246]], implicit [[DEF247]], implicit [[DEF248]] + ; GFX90A-NEXT: S_NOP 0, implicit [[DEF249]], implicit [[DEF250]], implicit [[DEF251]], implicit [[DEF252]], implicit [[DEF253]], implicit [[DEF254]], implicit [[DEF255]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] ; GFX90A-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir index 06b742f14e682..371753801d1a3 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir @@ -19,8 +19,8 @@ body: | ; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_region_1: IsSSA, NoPHIs, TracksLiveness ; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10) ; DEBUG-NEXT: ********** MI Scheduling ********** - ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_region_1:%bb.1 - ; DEBUG-NEXT: From: %23:vgpr_32 = nofpexcept DBG_VALUE 23, implicit $exec, implicit $mode + ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_region_1:%bb.2 + ; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 ; DEBUG-NEXT: To: End RegionInstrs: 2 bb.0: successors: %bb.1 @@ -91,9 +91,9 @@ body: | ; DEBUG: Machine code for function sink_and_inc_idx_when_skipping_small_regions_2: IsSSA, NoPHIs, TracksLiveness ; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy of 10 from rematerializing (original was 9, target was 10) ; DEBUG-NEXT: ********** MI Scheduling ********** - ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_regions_2:%bb.1 - ; DEBUG-NEXT: From: %23:vgpr_32 = nofpexcept DBG_VALUE 23, implicit $exec, implicit $mode - ; DEBUG-NEXT: To: End RegionInstrs: 2 + ; DEBUG-NEXT: sink_and_inc_idx_when_skipping_small_regions_2:%bb.2 + ; DEBUG-NEXT: From: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 + ; DEBUG-NEXT: To: End RegionInstrs: 4 bb.0: successors: %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir index 2cf1aea037b7c..8d24f6ba66968 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir @@ -3084,13 +3084,9 @@ body: | ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 69 ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 70 ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 71 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 72 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 73 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 74 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 75 ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 76 ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 77 @@ -3100,7 +3096,11 @@ body: | ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 81 ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 82 ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 83 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 84 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.1: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) @@ -9419,7 +9419,7 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]] ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: DBG_VALUE %23:vgpr_32, 0, 0 + ; GFX908-NEXT: DBG_VALUE %23, 0, 0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] @@ -9471,7 +9471,7 @@ body: | ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]] ; GFX908-GCNTRACKERS-NEXT: {{ $}} ; GFX908-GCNTRACKERS-NEXT: bb.2: - ; GFX908-GCNTRACKERS-NEXT: DBG_VALUE %23:vgpr_32, 0, 0 + ; GFX908-GCNTRACKERS-NEXT: DBG_VALUE %23, 0, 0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]] @@ -10269,13 +10269,13 @@ body: | ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 @@ -10291,9 +10291,7 @@ body: | ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.1: ; GFX908-NEXT: successors: %bb.2(0x80000000) @@ -10304,16 +10302,16 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]] + ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]] ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] + ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] + ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]] + ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] ; GFX908-NEXT: S_ENDPGM 0 ; ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remat_defregion_above_target @@ -10330,13 +10328,13 @@ body: | ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode + ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode + ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 @@ -10352,9 +10350,7 @@ body: | ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode ; GFX908-GCNTRACKERS-NEXT: {{ $}} ; GFX908-GCNTRACKERS-NEXT: bb.1: ; GFX908-GCNTRACKERS-NEXT: successors: %bb.2(0x80000000) @@ -10365,16 +10361,16 @@ body: | ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]] + ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]] ; GFX908-GCNTRACKERS-NEXT: {{ $}} ; GFX908-GCNTRACKERS-NEXT: bb.2: ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] + ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] + ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]], implicit [[V_CVT_I32_F64_e32_34]] + ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] ; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1 @@ -10389,13 +10385,13 @@ body: | %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode - %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode + %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 + %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode + %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 @@ -10411,30 +10407,28 @@ body: | %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0 - %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0 - %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0 + %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode bb.1: successors: %bb.2 - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19 S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24 S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29 - S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34 + S_NOP 0, implicit %30, implicit %31, implicit %32 bb.2: - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 - S_NOP 0, implicit %12, implicit %13, implicit %14 - S_NOP 0, implicit %15, implicit %17, implicit %18, implicit %19 + S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 + S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19 S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24 S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29 - S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34 + S_NOP 0, implicit %30, implicit %31 S_ENDPGM 0 ... @@ -10633,12 +10627,12 @@ body: | S_ENDPGM 0 ... --- -name: test_rollback_remats_emptydefregion_block +name: test_rollback_remats_emptydefregion tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | - ; GFX908-LABEL: name: test_rollback_remats_emptydefregion_block + ; GFX908-LABEL: name: test_rollback_remats_emptydefregion ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.1(0x80000000) ; GFX908-NEXT: {{ $}} @@ -10702,7 +10696,7 @@ body: | ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion_block + ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion ; GFX908-GCNTRACKERS: bb.0: ; GFX908-GCNTRACKERS-NEXT: successors: %bb.1(0x80000000) ; GFX908-GCNTRACKERS-NEXT: {{ $}} @@ -10831,207 +10825,6 @@ body: | S_ENDPGM 0 ... --- -name: test_rollback_remats_emptydefregion_barrier -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true -body: | - ; GFX908-LABEL: name: test_rollback_remats_emptydefregion_barrier - ; GFX908: bb.0: - ; GFX908-NEXT: successors: %bb.1(0x80000000) - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.1: - ; GFX908-NEXT: successors: %bb.2(0x80000000) - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode - ; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode - ; GFX908-NEXT: SCHED_BARRIER 0 - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: successors: %bb.3(0x80000000) - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]] - ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.3: - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] - ; GFX908-NEXT: S_ENDPGM 0 - ; - ; GFX908-GCNTRACKERS-LABEL: name: test_rollback_remats_emptydefregion_barrier - ; GFX908-GCNTRACKERS: bb.0: - ; GFX908-GCNTRACKERS-NEXT: successors: %bb.1(0x80000000) - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: bb.1: - ; GFX908-GCNTRACKERS-NEXT: successors: %bb.2(0x80000000) - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode - ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode - ; GFX908-GCNTRACKERS-NEXT: SCHED_BARRIER 0 - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: bb.2: - ; GFX908-GCNTRACKERS-NEXT: successors: %bb.3(0x80000000) - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_33]] - ; GFX908-GCNTRACKERS-NEXT: {{ $}} - ; GFX908-GCNTRACKERS-NEXT: bb.3: - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]] - ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]] - ; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.1 - - %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0 - %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 - %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 - %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 - %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0 - %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0 - %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0 - %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0 - %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0 - %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0 - %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0 - %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0 - %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0 - %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0 - %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0 - %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0 - %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0 - %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0 - %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0 - %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0 - %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0 - %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0 - %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0 - %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0 - %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0 - %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0 - %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0 - %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0 - %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0 - %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0 - %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0 - %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0 - - bb.1: - successors: %bb.2 - - %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode - %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode - SCHED_BARRIER 0 - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 - - bb.2: - successors: %bb.3 - - S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 - S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 - S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19 - S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24 - S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29 - S_NOP 0, implicit %30, implicit %31, implicit %32, implicit %33 - - bb.3: - - S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, - S_NOP 0, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 - S_NOP 0, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 - S_NOP 0, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19 - S_NOP 0, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24 - S_NOP 0, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29 - S_NOP 0, implicit %30, implicit %31 - - S_ENDPGM 0 -... ---- name: test_occ_8_physreg_use tracksRegLiveness: true machineFunctionInfo: diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll index 63c48121e40c9..8b6bb9b8c5fcd 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll @@ -3902,8 +3902,8 @@ define <32 x float> @test_mfma_loop_non_splat_ret_use() #0 { ; GFX908-NEXT: v_accvgpr_write_b32 a3, 0 ; GFX908-NEXT: v_accvgpr_write_b32 a2, 0 ; GFX908-NEXT: v_accvgpr_write_b32 a0, 0 -; GFX908-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX908-NEXT: s_mov_b32 s4, 16 +; GFX908-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX908-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX908-NEXT: .LBB11_1: ; %for.cond.preheader ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1