Conversation
…eraction (llvm#175755)" This reverts commit 125d24a.
…vm#175050)" This reverts commit 6aaa7fd.
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Lucas Ramirez (lucas-rami) ChangesThis reverts 8ab7937 and f21e359 which are causing a HIP failure in a Blender test. Patch is 174.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175813.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a032dd2092140..c8ce3aab3f303 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -28,20 +28,11 @@
#include "GCNRegPressure.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include <limits>
-#include <string>
#define DEBUG_TYPE "machine-scheduler"
@@ -979,8 +970,6 @@ void GCNScheduleDAGMILive::schedule() {
GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
- if (Regions[RegionIdx].first == Regions[RegionIdx].second)
- return llvm::getRegPressure(MRI, LiveIns[RegionIdx]);
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
@@ -1283,222 +1272,33 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
#define REMAT_PREFIX "[PreRARemat] "
#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-Printable PreRARematStage::ScoredRemat::print() const {
- return Printable([&](raw_ostream &OS) {
- OS << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';
- });
-}
-#endif
-
bool PreRARematStage::initGCNSchedStage() {
// FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
// regions inbetween the defs and region we sinked the def to. Will need to be
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
return false;
- // Maps all MIs (except lone terminators, which are not part of any region) to
- // their parent region. Non-lone terminators are considered part of the region
- // they delimitate.
- DenseMap<MachineInstr *, unsigned> MIRegion(MF.getInstructionCount());
-
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
+ RegionBB.reserve(NumRegions);
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI)
MIRegion.insert({&*MI, I});
- MachineBasicBlock *ParentMBB = Region.first->getParent();
- if (Region.second != ParentMBB->end())
- MIRegion.insert({&*Region.second, I});
- RegionBB.push_back(ParentMBB);
- }
-
-#ifndef NDEBUG
- auto PrintTargetRegions = [&]() -> void {
- if (TargetRegions.none()) {
- dbgs() << REMAT_PREFIX << "No target regions\n";
- return;
- }
- dbgs() << REMAT_PREFIX << "Target regions:\n";
- for (unsigned I : TargetRegions.set_bits())
- dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
- };
- auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
- return Printable([&, Remat](raw_ostream &OS) {
- // Concatenate all region numbers in which the register is unused and
- // live-through.
- bool HasLiveThroughRegion = false;
- OS << '[' << Remat.DefRegion << " -";
- for (unsigned I = 0; I < NumRegions; ++I) {
- if (Remat.isUnusedLiveThrough(I)) {
- if (HasLiveThroughRegion) {
- OS << ',';
- } else {
- OS << "- ";
- HasLiveThroughRegion = true;
- }
- OS << I;
- }
- }
- if (HasLiveThroughRegion)
- OS << " -";
- OS << "-> " << Remat.UseRegion << "] ";
- Remat.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
- /*SkipDebugLoc=*/false, /*AddNewLine=*/false);
- });
- };
-#endif
-
- // Set an objective for the stage based on current RP in each region.
- REMAT_DEBUG({
- dbgs() << "Analyzing ";
- MF.getFunction().printAsOperand(dbgs(), false);
- dbgs() << ": ";
- });
- if (!setObjective()) {
- LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "
- << MFI.getMaxWavesPerEU() << '\n');
- return false;
+ RegionBB.push_back(Region.first->getParent());
}
- LLVM_DEBUG({
- if (TargetOcc) {
- dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
- } else {
- dbgs() << "reduce spilling (minimum target occupancy is "
- << MFI.getMinWavesPerEU() << ")\n";
- }
- PrintTargetRegions();
- });
-
- if (!collectRematRegs(MIRegion)) {
- REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
- return false;
- }
- const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
- REMAT_DEBUG({
- dbgs() << "Rematerializable registers:\n";
- for (const RematReg &Remat : RematRegs)
- dbgs() << REMAT_PREFIX << " " << PrintRematReg(Remat) << '\n';
- dbgs() << REMAT_PREFIX << "Region frequencies\n";
- for (auto [I, Freq] : enumerate(FreqInfo.Regions)) {
- dbgs() << REMAT_PREFIX << " [" << I << "] ";
- if (Freq)
- dbgs() << Freq;
- else
- dbgs() << "unknown ";
- dbgs() << " | " << *DAG.Regions[I].first;
- }
- });
- SmallVector<ScoredRemat> ScoredRemats;
- for (RematReg &Remat : RematRegs)
- ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
-
-// Rematerialize registers in successive rounds until all RP targets are
-// satisifed or until we run out of rematerialization candidates.
-#ifndef NDEBUG
- unsigned RoundNum = 0;
-#endif
- BitVector RecomputeRP(NumRegions);
- do {
- assert(!ScoredRemats.empty() && "no more remat candidates");
-
- // (Re-)Score and (re-)sort all remats in increasing score order.
- for (ScoredRemat &Remat : ScoredRemats)
- Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
- sort(ScoredRemats);
-
- REMAT_DEBUG({
- dbgs() << "==== ROUND " << RoundNum++ << " ====\n"
- << REMAT_PREFIX
- << "Candidates with non-null score, in rematerialization order:\n";
- for (const ScoredRemat &RematDecision : reverse(ScoredRemats)) {
- if (RematDecision.hasNullScore())
- break;
- dbgs() << REMAT_PREFIX << " " << RematDecision.print() << " | "
- << *RematDecision.Remat->DefMI;
- }
- PrintTargetRegions();
- });
-
- RecomputeRP.reset();
- unsigned RematIdx = ScoredRemats.size();
-
- // Rematerialize registers in decreasing score order until we estimate
- // that all RP targets are satisfied or until rematerialization candidates
- // are no longer useful to decrease RP.
- for (; RematIdx && TargetRegions.any(); --RematIdx) {
- const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
- // Stop rematerializing on encountering a null score. Since scores
- // monotonically decrease as we rematerialize, we know there is nothing
- // useful left to do in such cases, even if we were to re-score.
- if (Candidate.hasNullScore()) {
- RematIdx = 0;
- break;
- }
-
- const RematReg &Remat = *Candidate.Remat;
- // When previous rematerializations in this round have already satisfied
- // RP targets in all regions this rematerialization can impact, we have a
- // good indication that our scores have diverged significantly from
- // reality, in which case we interrupt this round and re-score. This also
- // ensures that every rematerialization we perform is possibly impactful
- // in at least one target region.
- if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
- break;
-
- REMAT_DEBUG(dbgs() << "** REMAT " << PrintRematReg(Remat) << '\n';);
- // Every rematerialization we do here is likely to move the instruction
- // into a higher frequency region, increasing the total sum latency of the
- // instruction itself. This is acceptable if we are eliminating a spill in
- // the process, but when the goal is increasing occupancy we get nothing
- // out of rematerialization if occupancy is not increased in the end; in
- // such cases we want to roll back the rematerialization.
- RollbackInfo *Rollback =
- TargetOcc ? &Rollbacks.emplace_back(&Remat) : nullptr;
- rematerialize(Remat, RecomputeRP, Rollback);
- unsetSatisifedRPTargets(Remat.Live);
- }
-
- REMAT_DEBUG({
- if (!TargetRegions.any()) {
- dbgs() << "** Interrupt round on all targets achieved\n";
- } else if (RematIdx) {
- dbgs() << "** Interrupt round on stale score for "
- << *ScoredRemats[RematIdx - 1].Remat->DefMI;
- } else {
- dbgs() << "** Stop on exhausted rematerialization candidates\n";
- }
- });
-
- // Peel off registers we already rematerialized from the vector's tail.
- ScoredRemats.truncate(RematIdx);
- } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
- !ScoredRemats.empty());
- if (RescheduleRegions.none())
+ if (!canIncreaseOccupancyOrReduceSpill())
return false;
- // Commit all pressure changes to the DAG and compute minimum achieved
- // occupancy in impacted regions.
- REMAT_DEBUG(dbgs() << "==== REMAT RESULTS ====\n");
- unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();
- for (unsigned I : RescheduleRegions.set_bits()) {
- DAG.Pressure[I] = RPTargets[I].getCurrentRP();
- REMAT_DEBUG(dbgs() << '[' << I << "] Achieved occupancy "
- << DAG.Pressure[I].getOccupancy(ST, DynamicVGPRBlockSize)
- << " (" << RPTargets[I] << ")\n");
- }
- AchievedOcc = MFI.getMaxWavesPerEU();
- for (const GCNRegPressure &RP : DAG.Pressure) {
- AchievedOcc =
- std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));
- }
-
+ // Rematerialize identified instructions and update scheduler's state.
+ rematerialize();
+ if (GCNTrackers)
+ DAG.RegionLiveOuts.buildLiveRegMap();
REMAT_DEBUG({
dbgs() << "Retrying function scheduling with new min. occupancy of "
<< AchievedOcc << " from rematerializing (original was "
@@ -1507,6 +1307,7 @@ bool PreRARematStage::initGCNSchedStage() {
dbgs() << ", target was " << *TargetOcc;
dbgs() << ")\n";
});
+
if (AchievedOcc > DAG.MinOccupancy) {
DAG.MinOccupancy = AchievedOcc;
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -1540,10 +1341,6 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
}
bool GCNSchedStage::initGCNRegion() {
- // Skip empty scheduling region.
- if (DAG.begin() == DAG.end())
- return false;
-
// Check whether this new region is also a new block.
if (DAG.RegionBegin->getParent() != CurrentMBB)
setupNewBlock();
@@ -1551,8 +1348,8 @@ bool GCNSchedStage::initGCNRegion() {
unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
- // Skip regions with 1 schedulable instruction.
- if (DAG.begin() == std::prev(DAG.end()))
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
return false;
LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
@@ -2040,20 +1837,27 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
-bool PreRARematStage::setObjective() {
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const Function &F = MF.getFunction();
- // Set up "spilling targets" for all regions.
+ // Maps optimizable regions (i.e., regions at minimum and register-limited
+ // occupancy, or regions with spilling) to the target RP we would like to
+ // reach.
+ DenseMap<unsigned, GCNRPTarget> OptRegions;
unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);
unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- const GCNRegPressure &RP = DAG.Pressure[I];
- GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP);
- if (!Target.satisfied())
- TargetRegions.set(I);
- }
+ auto ResetTargetRegions = [&]() {
+ OptRegions.clear();
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ const GCNRegPressure &RP = DAG.Pressure[I];
+ GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP);
+ if (!Target.satisfied())
+ OptRegions.insert({I, Target});
+ }
+ };
- if (TargetRegions.any() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
+ ResetTargetRegions();
+ if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
// In addition to register usage being above addressable limits, occupancy
// below the minimum is considered like "spilling" as well.
TargetOcc = std::nullopt;
@@ -2061,68 +1865,94 @@ bool PreRARematStage::setObjective() {
// There is no spilling and room to improve occupancy; set up "increased
// occupancy targets" for all regions.
TargetOcc = DAG.MinOccupancy + 1;
- const unsigned VGPRBlockSize = MFI.getDynamicVGPRBlockSize();
+ unsigned VGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);
MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
- for (auto [I, Target] : enumerate(RPTargets)) {
- Target.setTarget(MaxSGPRs, MaxVGPRs);
- if (!Target.satisfied())
- TargetRegions.set(I);
- }
+ ResetTargetRegions();
}
+ REMAT_DEBUG({
+ dbgs() << "Analyzing ";
+ MF.getFunction().printAsOperand(dbgs(), false);
+ dbgs() << ": ";
+ if (OptRegions.empty()) {
+ dbgs() << "no objective to achieve, occupancy is maximal at "
+ << MFI.getMaxWavesPerEU();
+ } else if (!TargetOcc) {
+ dbgs() << "reduce spilling (minimum target occupancy is "
+ << MFI.getMinWavesPerEU() << ')';
+ } else {
+ dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "
+ << TargetOcc;
+ }
+ dbgs() << '\n';
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << OptIt->getSecond()
+ << '\n';
+ }
+ }
+ });
+ if (OptRegions.empty())
+ return false;
- return TargetRegions.any();
-}
+ // Accounts for a reduction in RP in an optimizable region. Returns whether we
+ // estimate that we have identified enough rematerialization opportunities to
+ // achieve our goal, and sets Progress to true when this particular reduction
+ // in pressure was helpful toward that goal.
+ auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
+ bool &Progress) -> bool {
+ GCNRPTarget &Target = OptIt->getSecond();
+ if (!Target.isSaveBeneficial(Reg))
+ return false;
+ Progress = true;
+ Target.saveReg(Reg, Mask, DAG.MRI);
+ if (Target.satisfied())
+ OptRegions.erase(OptIt->getFirst());
+ return OptRegions.empty();
+ };
-bool PreRARematStage::collectRematRegs(
- const DenseMap<MachineInstr *, unsigned> &MIRegion) {
// We need up-to-date live-out info. to query live-out register masks in
// regions containing rematerializable instructions.
DAG.RegionLiveOuts.buildLiveRegMap();
- // Set of registers already marked for potential remterialization; used to
- // avoid rematerialization chains.
- SmallSet<Register, 4> MarkedRegs;
- auto IsMarkedForRemat = [&MarkedRegs](const MachineOperand &MO) -> bool {
- return MO.isReg() && MarkedRegs.contains(MO.getReg());
- };
+ // Cache set of registers that are going to be rematerialized.
+ DenseSet<unsigned> RematRegs;
// Identify rematerializable instructions in the function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- RegionBoundaries Bounds = DAG.Regions[I];
- for (auto MI = Bounds.first; MI != Bounds.second; ++MI) {
+ auto Region = DAG.Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI) {
// The instruction must be rematerializable.
MachineInstr &DefMI = *MI;
if (!isReMaterializable(DefMI))
continue;
- // We only support rematerializing virtual registers with one
- // definition.
+ // We only support rematerializing virtual registers with one definition.
Register Reg = DefMI.getOperand(0).getReg();
if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
continue;
// We only care to rematerialize the instruction if it has a single
- // non-debug user in a different region.
- // FIXME: Allow rematerializations with multiple uses. This should be
- // relatively easy to support using the current cost model.
+ // non-debug user in a different region. The using MI may not belong to a
+ // region if it is a lone region terminator.
MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
if (!UseMI)
continue;
auto UseRegion = MIRegion.find(UseMI);
- if (UseRegion == MIRegion.end() || UseRegion->second == I)
+ if (UseRegion != MIRegion.end() && UseRegion->second == I)
continue;
// Do not rematerialize an instruction if it uses or is used by an
// instruction that we have designated for rematerialization.
// FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2.
- // either rematerializing the candidates in careful ordering, or
- // deferring the MBB RP walk until the entire chain has been
- // rematerialized.
- const MachineOperand &UseMO = UseMI->getOperand(0);
- if (IsMarkedForRemat(UseMO) ||
- llvm::any_of(DefMI.operands(), IsMarkedForRemat))
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the
+ // MBB RP walk until the entire chain has been rematerialized.
+ if (Rematerializations.contains(UseMI) ||
+ llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
+ return MO.isReg() && RematRegs.contains(MO.getReg());
+ }))
continue;
// Do not rematerialize an instruction it it uses registers that aren't
@@ -2133,181 +1963,106 @@ bool PreRARematStage::collectRematRegs(
*DAG.TII))
continue;
- // Add the instruction to the rematerializable list.
- MarkedRegs.insert(Reg);
- RematRegs.emplace_back(&DefMI, UseMI, DAG, MIRegion);
- }
- }
-
- return !RematRegs.empty();
-}
+ REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
+ RematInstruction &Remat =
+ Rematerializations.try_emplace(&DefMI, UseMI).first->second;
+
+ bool RematUseful = false;
+ if (auto It = OptRegions.find(I); It != OptRegions.end()) {
+ // Optimistically consider that moving the instruction out of its
+ // defining region will reduce RP in the latter; this assumes that
+ // maximum RP in the region is reached somewhere between the defining
+ // instruction and the end of the region.
+ REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
+ LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
+ if (ReduceRPInRegion(It, Reg, Mask, RematUseful))
+ return true;
+ }
-PreRARematStage::RematReg::RematReg(
- MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
- const DenseMap<MachineInstr *, unsigned> &MIRegion)
- : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
- LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
- De...
[truncated]
|
Priyanshu3820
pushed a commit
to Priyanshu3820/llvm-project
that referenced
this pull request
Jan 18, 2026
…vm#175050)" (llvm#175813) This reverts 8ab7937 and f21e359 which are causing a HIP failure in a Blender test.
BStott6
pushed a commit
to BStott6/llvm-project
that referenced
this pull request
Jan 22, 2026
…vm#175050)" (llvm#175813) This reverts 8ab7937 and f21e359 which are causing a HIP failure in a Blender test.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This reverts 8ab7937 and f21e359 which are causing a HIP failure in a Blender test.