Skip to content

Commit a6a8d5c

Browse files
committed
kvserver: introduce Allocator.ValidLeaseTargets()
This commit is a minor refactor of the `Allocator.TransferLeaseTarget` logic in order to make it more readable and, to abstract out a new exported `Allocator` method called `ValidLeaseTargets()`. The contract of `ValidLeaseTargets()` is as follows: ``` // ValidLeaseTargets returns a set of candidate stores that are suitable to be // transferred a lease for the given range. // // - It excludes stores that are dead, or marked draining or suspect. // - If the range has lease_preferences, and there are any non-draining, // non-suspect nodes that match those preferences, it excludes stores that don't // match those preferences. // - It excludes replicas that may need snapshots. If replica calling this // method is not the Raft leader (meaning that it doesn't know whether follower // replicas need a snapshot or not), produces no results. ``` Previously, there were multiple places where we were performing the logic that's encapsulated by `ValidLeaseTargets()`, which was a potential source of bugs. This is an attempt to unify this logic in one place that's relatively well-tested. This commit is only a refactor, and does not attempt to change any behavior. As such, no existing tests have been changed, with the exception of a subtest inside `TestAllocatorTransferLeaseTargetDraining`. See the comment over that subtest to understand why the behavior change made by this patch is desirable. The next commit in this PR uses this method to fix (at least part of) cockroachdb#74691. Release note: none
1 parent 60b250f commit a6a8d5c

3 files changed

Lines changed: 185 additions & 110 deletions

File tree

pkg/kv/kvserver/allocator.go

Lines changed: 160 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,6 +1474,120 @@ func (a *Allocator) scorerOptionsForScatter() *scatterScorerOptions {
14741474
}
14751475
}
14761476

1477+
// ValidLeaseTargets returns a set of candidate stores that are suitable to be
1478+
// transferred a lease for the given range.
1479+
//
1480+
// - It excludes stores that are dead, or marked draining or suspect.
1481+
// - If the range has lease_preferences, and there are any non-draining,
1482+
// non-suspect nodes that match those preferences, it excludes stores that don't
1483+
// match those preferences.
1484+
// - It excludes replicas that may need snapshots. If replica calling this
1485+
// method is not the Raft leader (meaning that it doesn't know whether follower
1486+
// replicas need a snapshot or not), produces no results.
1487+
func (a *Allocator) ValidLeaseTargets(
1488+
ctx context.Context,
1489+
conf roachpb.SpanConfig,
1490+
existing []roachpb.ReplicaDescriptor,
1491+
leaseRepl interface {
1492+
RaftStatus() *raft.Status
1493+
StoreID() roachpb.StoreID
1494+
},
1495+
// excludeLeaseRepl dictates whether the result set can include the source
1496+
// replica.
1497+
excludeLeaseRepl bool,
1498+
) []roachpb.ReplicaDescriptor {
1499+
candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing))
1500+
for i := range existing {
1501+
if existing[i].GetType() != roachpb.VOTER_FULL {
1502+
continue
1503+
}
1504+
// If we're not allowed to include the current replica, remove it from
1505+
// consideration here.
1506+
if existing[i].StoreID == leaseRepl.StoreID() && excludeLeaseRepl {
1507+
continue
1508+
}
1509+
candidates = append(candidates, existing[i])
1510+
}
1511+
candidates, _ = a.storePool.liveAndDeadReplicas(
1512+
candidates, false, /* includeSuspectAndDrainingStores */
1513+
)
1514+
1515+
if a.knobs == nil || !a.knobs.AllowLeaseTransfersToReplicasNeedingSnapshots {
1516+
// Only proceed with the lease transfer if we are also the raft leader (we
1517+
// already know we are the leaseholder at this point), and only consider
1518+
// replicas that are in `StateReplicate` as potential candidates.
1519+
//
1520+
// NB: The RaftStatus() only returns a non-empty and non-nil result on the
1521+
// Raft leader (since Raft followers do not track the progress of other
1522+
// replicas, only the leader does).
1523+
//
1524+
// NB: On every Raft tick, we try to ensure that leadership is collocated with
1525+
// leaseholdership (see
1526+
// Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
1527+
// on a range that is not already borked (i.e. can accept writes), periods of
1528+
// leader/leaseholder misalignment should be ephemeral and rare. We choose to
1529+
// be pessimistic here and choose to bail on the lease transfer, as opposed to
1530+
// potentially transferring the lease to a replica that may be waiting for a
1531+
// snapshot (which will wedge the range until the replica applies that
1532+
// snapshot).
1533+
candidates = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), candidates)
1534+
}
1535+
1536+
// Determine which store(s) is preferred based on user-specified preferences.
1537+
// If any stores match, only consider those stores as candidates.
1538+
preferred := a.preferredLeaseholders(conf, candidates)
1539+
if len(preferred) > 0 {
1540+
candidates = preferred
1541+
}
1542+
return candidates
1543+
}
1544+
1545+
// leaseholderShouldMoveDueToPreferences returns true if the current leaseholder
1546+
// is in violation of lease preferences _that can otherwise be satisfied_ by
1547+
// some existing replica.
1548+
//
1549+
// INVARIANT: This method should only be called with an `allExistingReplicas`
1550+
// slice that contains `leaseRepl`.
1551+
func (a *Allocator) leaseholderShouldMoveDueToPreferences(
1552+
ctx context.Context,
1553+
conf roachpb.SpanConfig,
1554+
leaseRepl interface {
1555+
RaftStatus() *raft.Status
1556+
StoreID() roachpb.StoreID
1557+
},
1558+
allExistingReplicas []roachpb.ReplicaDescriptor,
1559+
) bool {
1560+
// Defensive check to ensure that this is never called with a replica set that
1561+
// does not contain the leaseholder.
1562+
var leaseholderInExisting bool
1563+
for _, repl := range allExistingReplicas {
1564+
if repl.StoreID == leaseRepl.StoreID() {
1565+
leaseholderInExisting = true
1566+
break
1567+
}
1568+
}
1569+
if !leaseholderInExisting {
1570+
log.Errorf(ctx, "programming error: expected leaseholder store to be in the slice of existing replicas")
1571+
}
1572+
1573+
// Exclude suspect/draining/dead stores.
1574+
candidates, _ := a.storePool.liveAndDeadReplicas(
1575+
allExistingReplicas, false, /* includeSuspectAndDrainingStores */
1576+
)
1577+
// If there are any replicas that do match lease preferences, then we check if
1578+
// the existing leaseholder is one of them.
1579+
preferred := a.preferredLeaseholders(conf, candidates)
1580+
if len(preferred) == 0 {
1581+
return false
1582+
}
1583+
for _, repl := range preferred {
1584+
if repl.StoreID == leaseRepl.StoreID() {
1585+
return false
1586+
}
1587+
}
1588+
return true
1589+
}
1590+
14771591
// TransferLeaseTarget returns a suitable replica to transfer the range lease
14781592
// to from the provided list. It excludes the current lease holder replica
14791593
// unless asked to do otherwise by the checkTransferLeaseSource parameter.
@@ -1501,83 +1615,27 @@ func (a *Allocator) TransferLeaseTarget(
15011615
forceDecisionWithoutStats bool,
15021616
opts transferLeaseOptions,
15031617
) roachpb.ReplicaDescriptor {
1618+
excludeLeaseRepl := !opts.checkTransferLeaseSource
1619+
if a.leaseholderShouldMoveDueToPreferences(ctx, conf, leaseRepl, existing) {
1620+
// Explicitly exclude the current leaseholder from the result set if it is
1621+
// in violation of lease preferences that can be satisfied by some other
1622+
// replica.
1623+
excludeLeaseRepl = true
1624+
}
1625+
15041626
allStoresList, _, _ := a.storePool.getStoreList(storeFilterNone)
15051627
storeDescMap := storeListToMap(allStoresList)
1506-
15071628
sl, _, _ := a.storePool.getStoreList(storeFilterSuspect)
15081629
sl = sl.excludeInvalid(conf.Constraints)
15091630
sl = sl.excludeInvalid(conf.VoterConstraints)
1510-
15111631
candidateLeasesMean := sl.candidateLeases.mean
15121632

15131633
source, ok := a.storePool.getStoreDescriptor(leaseRepl.StoreID())
15141634
if !ok {
15151635
return roachpb.ReplicaDescriptor{}
15161636
}
15171637

1518-
// Determine which store(s) is preferred based on user-specified preferences.
1519-
// If any stores match, only consider those stores as candidates. If only one
1520-
// store matches, it's where the lease should be (unless the preferred store
1521-
// is the current one and checkTransferLeaseSource is false).
1522-
var preferred []roachpb.ReplicaDescriptor
1523-
checkTransferLeaseSource := opts.checkTransferLeaseSource
1524-
if checkTransferLeaseSource {
1525-
preferred = a.preferredLeaseholders(conf, existing)
1526-
} else {
1527-
// TODO(a-robinson): Should we just always remove the source store from
1528-
// existing when checkTransferLeaseSource is false? I'd do it now, but
1529-
// it's too big a change to make right before a major release.
1530-
var candidates []roachpb.ReplicaDescriptor
1531-
for _, repl := range existing {
1532-
if repl.StoreID != leaseRepl.StoreID() {
1533-
candidates = append(candidates, repl)
1534-
}
1535-
}
1536-
preferred = a.preferredLeaseholders(conf, candidates)
1537-
}
1538-
if len(preferred) == 1 {
1539-
if preferred[0].StoreID == leaseRepl.StoreID() {
1540-
return roachpb.ReplicaDescriptor{}
1541-
}
1542-
// Verify that the preferred replica is eligible to receive the lease.
1543-
preferred, _ = a.storePool.liveAndDeadReplicas(preferred, false /* includeSuspectAndDrainingStores */)
1544-
if len(preferred) == 1 {
1545-
return preferred[0]
1546-
}
1547-
return roachpb.ReplicaDescriptor{}
1548-
} else if len(preferred) > 1 {
1549-
// If the current leaseholder is not preferred, set checkTransferLeaseSource
1550-
// to false to motivate the below logic to transfer the lease.
1551-
existing = preferred
1552-
if !storeHasReplica(leaseRepl.StoreID(), roachpb.MakeReplicaSet(preferred).ReplicationTargets()) {
1553-
checkTransferLeaseSource = false
1554-
}
1555-
}
1556-
1557-
// Only consider live, non-draining, non-suspect replicas.
1558-
existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectAndDrainingStores */)
1559-
1560-
if a.knobs == nil || !a.knobs.AllowLeaseTransfersToReplicasNeedingSnapshots {
1561-
// Only proceed with the lease transfer if we are also the raft leader (we
1562-
// already know we are the leaseholder at this point), and only consider
1563-
// replicas that are in `StateReplicate` as potential candidates.
1564-
//
1565-
// NB: The RaftStatus() only returns a non-empty and non-nil result on the
1566-
// Raft leader (since Raft followers do not track the progress of other
1567-
// replicas, only the leader does).
1568-
//
1569-
// NB: On every Raft tick, we try to ensure that leadership is collocated with
1570-
// leaseholdership (see
1571-
// Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
1572-
// on a range that is not already borked (i.e. can accept writes), periods of
1573-
// leader/leaseholder misalignment should be ephemeral and rare. We choose to
1574-
// be pessimistic here and choose to bail on the lease transfer, as opposed to
1575-
// potentially transferring the lease to a replica that may be waiting for a
1576-
// snapshot (which will wedge the range until the replica applies that
1577-
// snapshot).
1578-
existing = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), existing)
1579-
}
1580-
1638+
existing = a.ValidLeaseTargets(ctx, conf, existing, leaseRepl, excludeLeaseRepl)
15811639
// Short-circuit if there are no valid targets out there.
15821640
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) {
15831641
log.VEventf(ctx, 2, "no lease transfer target found for r%d", leaseRepl.GetRangeID())
@@ -1592,7 +1650,7 @@ func (a *Allocator) TransferLeaseTarget(
15921650
transferDec, repl := a.shouldTransferLeaseForAccessLocality(
15931651
ctx, source, existing, stats, nil, candidateLeasesMean,
15941652
)
1595-
if checkTransferLeaseSource {
1653+
if !excludeLeaseRepl {
15961654
switch transferDec {
15971655
case shouldNotTransfer:
15981656
if !forceDecisionWithoutStats {
@@ -1611,13 +1669,21 @@ func (a *Allocator) TransferLeaseTarget(
16111669
if repl != (roachpb.ReplicaDescriptor{}) {
16121670
return repl
16131671
}
1672+
// Fall back to logic that doesn't take request counts and latency into
1673+
// account if the counts/latency-based logic couldn't pick a best replica.
16141674
fallthrough
16151675

16161676
case leaseCountConvergence:
1617-
// Fall back to logic that doesn't take request counts and latency into
1618-
// account if the counts/latency-based logic couldn't pick a best replica.
1619-
candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing))
1677+
// If we want to ignore the existing lease counts on replicas, just do a
1678+
// random transfer.
1679+
if !opts.checkCandidateFullness {
1680+
a.randGen.Lock()
1681+
defer a.randGen.Unlock()
1682+
return existing[a.randGen.Intn(len(existing))]
1683+
}
1684+
16201685
var bestOption roachpb.ReplicaDescriptor
1686+
candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing))
16211687
bestOptionLeaseCount := int32(math.MaxInt32)
16221688
for _, repl := range existing {
16231689
if leaseRepl.StoreID() == repl.StoreID {
@@ -1627,18 +1693,19 @@ func (a *Allocator) TransferLeaseTarget(
16271693
if !ok {
16281694
continue
16291695
}
1630-
if !opts.checkCandidateFullness || float64(storeDesc.Capacity.LeaseCount) < candidateLeasesMean-0.5 {
1696+
if float64(storeDesc.Capacity.LeaseCount) < candidateLeasesMean-0.5 {
16311697
candidates = append(candidates, repl)
1632-
} else if storeDesc.Capacity.LeaseCount < bestOptionLeaseCount {
1698+
}
1699+
if storeDesc.Capacity.LeaseCount < bestOptionLeaseCount {
16331700
bestOption = repl
16341701
bestOptionLeaseCount = storeDesc.Capacity.LeaseCount
16351702
}
16361703
}
16371704
if len(candidates) == 0 {
1638-
// If we aren't supposed to be considering the current leaseholder (e.g.
1639-
// because we need to remove this replica for some reason), return
1640-
// our best option if we otherwise wouldn't want to do anything.
1641-
if !checkTransferLeaseSource {
1705+
// If there were no existing replicas on stores with less-than-mean
1706+
// leases, and we _must_ move the lease away (indicated by
1707+
// `opts.excludeLeaseRepl`), just return the best possible option.
1708+
if excludeLeaseRepl {
16421709
return bestOption
16431710
}
16441711
return roachpb.ReplicaDescriptor{}
@@ -1784,41 +1851,30 @@ func (a *Allocator) ShouldTransferLease(
17841851
ctx context.Context,
17851852
conf roachpb.SpanConfig,
17861853
existing []roachpb.ReplicaDescriptor,
1787-
leaseStoreID roachpb.StoreID,
1854+
leaseRepl interface {
1855+
RaftStatus() *raft.Status
1856+
StoreID() roachpb.StoreID
1857+
},
17881858
stats *replicaStats,
17891859
) bool {
1790-
source, ok := a.storePool.getStoreDescriptor(leaseStoreID)
1791-
if !ok {
1792-
return false
1860+
if a.leaseholderShouldMoveDueToPreferences(ctx, conf, leaseRepl, existing) {
1861+
return true
17931862
}
1863+
existing = a.ValidLeaseTargets(ctx, conf, existing, leaseRepl, false /* excludeLeaseRepl */)
17941864

1795-
// Determine which store(s) is preferred based on user-specified preferences.
1796-
// If any stores match, only consider those stores as options. If only one
1797-
// store matches, it's where the lease should be.
1798-
preferred := a.preferredLeaseholders(conf, existing)
1799-
if len(preferred) == 1 {
1800-
return preferred[0].StoreID != leaseStoreID
1801-
} else if len(preferred) > 1 {
1802-
existing = preferred
1803-
// If the current leaseholder isn't one of the preferred stores, then we
1804-
// should try to transfer the lease.
1805-
if !storeHasReplica(leaseStoreID, roachpb.MakeReplicaSet(existing).ReplicationTargets()) {
1806-
return true
1807-
}
1865+
// Short-circuit if there are no valid targets out there.
1866+
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) {
1867+
return false
1868+
}
1869+
source, ok := a.storePool.getStoreDescriptor(leaseRepl.StoreID())
1870+
if !ok {
1871+
return false
18081872
}
18091873

18101874
sl, _, _ := a.storePool.getStoreList(storeFilterSuspect)
18111875
sl = sl.excludeInvalid(conf.Constraints)
18121876
sl = sl.excludeInvalid(conf.VoterConstraints)
1813-
log.VEventf(ctx, 3, "ShouldTransferLease (lease-holder=%d):\n%s", leaseStoreID, sl)
1814-
1815-
// Only consider live, non-draining, non-suspect replicas.
1816-
existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectNodes */)
1817-
1818-
// Short-circuit if there are no valid targets out there.
1819-
if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == source.StoreID) {
1820-
return false
1821-
}
1877+
log.VEventf(ctx, 3, "ShouldTransferLease (lease-holder=%d):\n%s", leaseRepl, sl)
18221878

18231879
transferDec, _ := a.shouldTransferLeaseForAccessLocality(
18241880
ctx,
@@ -1840,7 +1896,9 @@ func (a *Allocator) ShouldTransferLease(
18401896
log.Fatalf(ctx, "unexpected transfer decision %d", transferDec)
18411897
}
18421898

1843-
log.VEventf(ctx, 3, "ShouldTransferLease decision (lease-holder=%d): %t", leaseStoreID, result)
1899+
log.VEventf(
1900+
ctx, 3, "ShouldTransferLease decision (lease-holder=s%d): %t", leaseRepl.StoreID(), result,
1901+
)
18441902
return result
18451903
}
18461904

0 commit comments

Comments
 (0)