@@ -1474,6 +1474,120 @@ func (a *Allocator) scorerOptionsForScatter() *scatterScorerOptions {
14741474 }
14751475}
14761476
1477+ // ValidLeaseTargets returns a set of candidate stores that are suitable to be
1478+ // transferred a lease for the given range.
1479+ //
1480+ // - It excludes stores that are dead, or marked draining or suspect.
1481+ // - If the range has lease_preferences, and there are any non-draining,
1482+ // non-suspect nodes that match those preferences, it excludes stores that don't
1483+ // match those preferences.
1484+ // - It excludes replicas that may need snapshots. If replica calling this
1485+ // method is not the Raft leader (meaning that it doesn't know whether follower
1486+ // replicas need a snapshot or not), produces no results.
1487+ func (a * Allocator ) ValidLeaseTargets (
1488+ ctx context.Context ,
1489+ conf roachpb.SpanConfig ,
1490+ existing []roachpb.ReplicaDescriptor ,
1491+ leaseRepl interface {
1492+ RaftStatus () * raft.Status
1493+ StoreID () roachpb.StoreID
1494+ },
1495+ // excludeLeaseRepl dictates whether the result set can include the source
1496+ // replica.
1497+ excludeLeaseRepl bool ,
1498+ ) []roachpb.ReplicaDescriptor {
1499+ candidates := make ([]roachpb.ReplicaDescriptor , 0 , len (existing ))
1500+ for i := range existing {
1501+ if existing [i ].GetType () != roachpb .VOTER_FULL {
1502+ continue
1503+ }
1504+ // If we're not allowed to include the current replica, remove it from
1505+ // consideration here.
1506+ if existing [i ].StoreID == leaseRepl .StoreID () && excludeLeaseRepl {
1507+ continue
1508+ }
1509+ candidates = append (candidates , existing [i ])
1510+ }
1511+ candidates , _ = a .storePool .liveAndDeadReplicas (
1512+ candidates , false , /* includeSuspectAndDrainingStores */
1513+ )
1514+
1515+ if a .knobs == nil || ! a .knobs .AllowLeaseTransfersToReplicasNeedingSnapshots {
1516+ // Only proceed with the lease transfer if we are also the raft leader (we
1517+ // already know we are the leaseholder at this point), and only consider
1518+ // replicas that are in `StateReplicate` as potential candidates.
1519+ //
1520+ // NB: The RaftStatus() only returns a non-empty and non-nil result on the
1521+ // Raft leader (since Raft followers do not track the progress of other
1522+ // replicas, only the leader does).
1523+ //
1524+ // NB: On every Raft tick, we try to ensure that leadership is collocated with
1525+ // leaseholdership (see
1526+ // Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
1527+ // on a range that is not already borked (i.e. can accept writes), periods of
1528+ // leader/leaseholder misalignment should be ephemeral and rare. We choose to
1529+ // be pessimistic here and choose to bail on the lease transfer, as opposed to
1530+ // potentially transferring the lease to a replica that may be waiting for a
1531+ // snapshot (which will wedge the range until the replica applies that
1532+ // snapshot).
1533+ candidates = excludeReplicasInNeedOfSnapshots (ctx , leaseRepl .RaftStatus (), candidates )
1534+ }
1535+
1536+ // Determine which store(s) is preferred based on user-specified preferences.
1537+ // If any stores match, only consider those stores as candidates.
1538+ preferred := a .preferredLeaseholders (conf , candidates )
1539+ if len (preferred ) > 0 {
1540+ candidates = preferred
1541+ }
1542+ return candidates
1543+ }
1544+
1545+ // leaseholderShouldMoveDueToPreferences returns true if the current leaseholder
1546+ // is in violation of lease preferences _that can otherwise be satisfied_ by
1547+ // some existing replica.
1548+ //
1549+ // INVARIANT: This method should only be called with an `allExistingReplicas`
1550+ // slice that contains `leaseRepl`.
1551+ func (a * Allocator ) leaseholderShouldMoveDueToPreferences (
1552+ ctx context.Context ,
1553+ conf roachpb.SpanConfig ,
1554+ leaseRepl interface {
1555+ RaftStatus () * raft.Status
1556+ StoreID () roachpb.StoreID
1557+ },
1558+ allExistingReplicas []roachpb.ReplicaDescriptor ,
1559+ ) bool {
1560+ // Defensive check to ensure that this is never called with a replica set that
1561+ // does not contain the leaseholder.
1562+ var leaseholderInExisting bool
1563+ for _ , repl := range allExistingReplicas {
1564+ if repl .StoreID == leaseRepl .StoreID () {
1565+ leaseholderInExisting = true
1566+ break
1567+ }
1568+ }
1569+ if ! leaseholderInExisting {
1570+ log .Errorf (ctx , "programming error: expected leaseholder store to be in the slice of existing replicas" )
1571+ }
1572+
1573+ // Exclude suspect/draining/dead stores.
1574+ candidates , _ := a .storePool .liveAndDeadReplicas (
1575+ allExistingReplicas , false , /* includeSuspectAndDrainingStores */
1576+ )
1577+ // If there are any replicas that do match lease preferences, then we check if
1578+ // the existing leaseholder is one of them.
1579+ preferred := a .preferredLeaseholders (conf , candidates )
1580+ if len (preferred ) == 0 {
1581+ return false
1582+ }
1583+ for _ , repl := range preferred {
1584+ if repl .StoreID == leaseRepl .StoreID () {
1585+ return false
1586+ }
1587+ }
1588+ return true
1589+ }
1590+
14771591// TransferLeaseTarget returns a suitable replica to transfer the range lease
14781592// to from the provided list. It excludes the current lease holder replica
14791593// unless asked to do otherwise by the checkTransferLeaseSource parameter.
@@ -1501,83 +1615,27 @@ func (a *Allocator) TransferLeaseTarget(
15011615 forceDecisionWithoutStats bool ,
15021616 opts transferLeaseOptions ,
15031617) roachpb.ReplicaDescriptor {
1618+ excludeLeaseRepl := ! opts .checkTransferLeaseSource
1619+ if a .leaseholderShouldMoveDueToPreferences (ctx , conf , leaseRepl , existing ) {
1620+ // Explicitly exclude the current leaseholder from the result set if it is
1621+ // in violation of lease preferences that can be satisfied by some other
1622+ // replica.
1623+ excludeLeaseRepl = true
1624+ }
1625+
15041626 allStoresList , _ , _ := a .storePool .getStoreList (storeFilterNone )
15051627 storeDescMap := storeListToMap (allStoresList )
1506-
15071628 sl , _ , _ := a .storePool .getStoreList (storeFilterSuspect )
15081629 sl = sl .excludeInvalid (conf .Constraints )
15091630 sl = sl .excludeInvalid (conf .VoterConstraints )
1510-
15111631 candidateLeasesMean := sl .candidateLeases .mean
15121632
15131633 source , ok := a .storePool .getStoreDescriptor (leaseRepl .StoreID ())
15141634 if ! ok {
15151635 return roachpb.ReplicaDescriptor {}
15161636 }
15171637
1518- // Determine which store(s) is preferred based on user-specified preferences.
1519- // If any stores match, only consider those stores as candidates. If only one
1520- // store matches, it's where the lease should be (unless the preferred store
1521- // is the current one and checkTransferLeaseSource is false).
1522- var preferred []roachpb.ReplicaDescriptor
1523- checkTransferLeaseSource := opts .checkTransferLeaseSource
1524- if checkTransferLeaseSource {
1525- preferred = a .preferredLeaseholders (conf , existing )
1526- } else {
1527- // TODO(a-robinson): Should we just always remove the source store from
1528- // existing when checkTransferLeaseSource is false? I'd do it now, but
1529- // it's too big a change to make right before a major release.
1530- var candidates []roachpb.ReplicaDescriptor
1531- for _ , repl := range existing {
1532- if repl .StoreID != leaseRepl .StoreID () {
1533- candidates = append (candidates , repl )
1534- }
1535- }
1536- preferred = a .preferredLeaseholders (conf , candidates )
1537- }
1538- if len (preferred ) == 1 {
1539- if preferred [0 ].StoreID == leaseRepl .StoreID () {
1540- return roachpb.ReplicaDescriptor {}
1541- }
1542- // Verify that the preferred replica is eligible to receive the lease.
1543- preferred , _ = a .storePool .liveAndDeadReplicas (preferred , false /* includeSuspectAndDrainingStores */ )
1544- if len (preferred ) == 1 {
1545- return preferred [0 ]
1546- }
1547- return roachpb.ReplicaDescriptor {}
1548- } else if len (preferred ) > 1 {
1549- // If the current leaseholder is not preferred, set checkTransferLeaseSource
1550- // to false to motivate the below logic to transfer the lease.
1551- existing = preferred
1552- if ! storeHasReplica (leaseRepl .StoreID (), roachpb .MakeReplicaSet (preferred ).ReplicationTargets ()) {
1553- checkTransferLeaseSource = false
1554- }
1555- }
1556-
1557- // Only consider live, non-draining, non-suspect replicas.
1558- existing , _ = a .storePool .liveAndDeadReplicas (existing , false /* includeSuspectAndDrainingStores */ )
1559-
1560- if a .knobs == nil || ! a .knobs .AllowLeaseTransfersToReplicasNeedingSnapshots {
1561- // Only proceed with the lease transfer if we are also the raft leader (we
1562- // already know we are the leaseholder at this point), and only consider
1563- // replicas that are in `StateReplicate` as potential candidates.
1564- //
1565- // NB: The RaftStatus() only returns a non-empty and non-nil result on the
1566- // Raft leader (since Raft followers do not track the progress of other
1567- // replicas, only the leader does).
1568- //
1569- // NB: On every Raft tick, we try to ensure that leadership is collocated with
1570- // leaseholdership (see
1571- // Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that
1572- // on a range that is not already borked (i.e. can accept writes), periods of
1573- // leader/leaseholder misalignment should be ephemeral and rare. We choose to
1574- // be pessimistic here and choose to bail on the lease transfer, as opposed to
1575- // potentially transferring the lease to a replica that may be waiting for a
1576- // snapshot (which will wedge the range until the replica applies that
1577- // snapshot).
1578- existing = excludeReplicasInNeedOfSnapshots (ctx , leaseRepl .RaftStatus (), existing )
1579- }
1580-
1638+ existing = a .ValidLeaseTargets (ctx , conf , existing , leaseRepl , excludeLeaseRepl )
15811639 // Short-circuit if there are no valid targets out there.
15821640 if len (existing ) == 0 || (len (existing ) == 1 && existing [0 ].StoreID == leaseRepl .StoreID ()) {
15831641 log .VEventf (ctx , 2 , "no lease transfer target found for r%d" , leaseRepl .GetRangeID ())
@@ -1592,7 +1650,7 @@ func (a *Allocator) TransferLeaseTarget(
15921650 transferDec , repl := a .shouldTransferLeaseForAccessLocality (
15931651 ctx , source , existing , stats , nil , candidateLeasesMean ,
15941652 )
1595- if checkTransferLeaseSource {
1653+ if ! excludeLeaseRepl {
15961654 switch transferDec {
15971655 case shouldNotTransfer :
15981656 if ! forceDecisionWithoutStats {
@@ -1611,13 +1669,21 @@ func (a *Allocator) TransferLeaseTarget(
16111669 if repl != (roachpb.ReplicaDescriptor {}) {
16121670 return repl
16131671 }
1672+ // Fall back to logic that doesn't take request counts and latency into
1673+ // account if the counts/latency-based logic couldn't pick a best replica.
16141674 fallthrough
16151675
16161676 case leaseCountConvergence :
1617- // Fall back to logic that doesn't take request counts and latency into
1618- // account if the counts/latency-based logic couldn't pick a best replica.
1619- candidates := make ([]roachpb.ReplicaDescriptor , 0 , len (existing ))
1677+ // If we want to ignore the existing lease counts on replicas, just do a
1678+ // random transfer.
1679+ if ! opts .checkCandidateFullness {
1680+ a .randGen .Lock ()
1681+ defer a .randGen .Unlock ()
1682+ return existing [a .randGen .Intn (len (existing ))]
1683+ }
1684+
16201685 var bestOption roachpb.ReplicaDescriptor
1686+ candidates := make ([]roachpb.ReplicaDescriptor , 0 , len (existing ))
16211687 bestOptionLeaseCount := int32 (math .MaxInt32 )
16221688 for _ , repl := range existing {
16231689 if leaseRepl .StoreID () == repl .StoreID {
@@ -1627,18 +1693,19 @@ func (a *Allocator) TransferLeaseTarget(
16271693 if ! ok {
16281694 continue
16291695 }
1630- if ! opts . checkCandidateFullness || float64 (storeDesc .Capacity .LeaseCount ) < candidateLeasesMean - 0.5 {
1696+ if float64 (storeDesc .Capacity .LeaseCount ) < candidateLeasesMean - 0.5 {
16311697 candidates = append (candidates , repl )
1632- } else if storeDesc .Capacity .LeaseCount < bestOptionLeaseCount {
1698+ }
1699+ if storeDesc .Capacity .LeaseCount < bestOptionLeaseCount {
16331700 bestOption = repl
16341701 bestOptionLeaseCount = storeDesc .Capacity .LeaseCount
16351702 }
16361703 }
16371704 if len (candidates ) == 0 {
1638- // If we aren't supposed to be considering the current leaseholder (e.g.
1639- // because we need to remove this replica for some reason), return
1640- // our best option if we otherwise wouldn't want to do anything .
1641- if ! checkTransferLeaseSource {
1705+ // If there were no existing replicas on stores with less-than-mean
1706+ // leases, and we _must_ move the lease away (indicated by
1707+ // `opts.excludeLeaseRepl`), just return the best possible option .
1708+ if excludeLeaseRepl {
16421709 return bestOption
16431710 }
16441711 return roachpb.ReplicaDescriptor {}
@@ -1784,41 +1851,30 @@ func (a *Allocator) ShouldTransferLease(
17841851 ctx context.Context ,
17851852 conf roachpb.SpanConfig ,
17861853 existing []roachpb.ReplicaDescriptor ,
1787- leaseStoreID roachpb.StoreID ,
1854+ leaseRepl interface {
1855+ RaftStatus () * raft.Status
1856+ StoreID () roachpb.StoreID
1857+ },
17881858 stats * replicaStats ,
17891859) bool {
1790- source , ok := a .storePool .getStoreDescriptor (leaseStoreID )
1791- if ! ok {
1792- return false
1860+ if a .leaseholderShouldMoveDueToPreferences (ctx , conf , leaseRepl , existing ) {
1861+ return true
17931862 }
1863+ existing = a .ValidLeaseTargets (ctx , conf , existing , leaseRepl , false /* excludeLeaseRepl */ )
17941864
1795- // Determine which store(s) is preferred based on user-specified preferences.
1796- // If any stores match, only consider those stores as options. If only one
1797- // store matches, it's where the lease should be.
1798- preferred := a .preferredLeaseholders (conf , existing )
1799- if len (preferred ) == 1 {
1800- return preferred [0 ].StoreID != leaseStoreID
1801- } else if len (preferred ) > 1 {
1802- existing = preferred
1803- // If the current leaseholder isn't one of the preferred stores, then we
1804- // should try to transfer the lease.
1805- if ! storeHasReplica (leaseStoreID , roachpb .MakeReplicaSet (existing ).ReplicationTargets ()) {
1806- return true
1807- }
1865+ // Short-circuit if there are no valid targets out there.
1866+ if len (existing ) == 0 || (len (existing ) == 1 && existing [0 ].StoreID == leaseRepl .StoreID ()) {
1867+ return false
1868+ }
1869+ source , ok := a .storePool .getStoreDescriptor (leaseRepl .StoreID ())
1870+ if ! ok {
1871+ return false
18081872 }
18091873
18101874 sl , _ , _ := a .storePool .getStoreList (storeFilterSuspect )
18111875 sl = sl .excludeInvalid (conf .Constraints )
18121876 sl = sl .excludeInvalid (conf .VoterConstraints )
1813- log .VEventf (ctx , 3 , "ShouldTransferLease (lease-holder=%d):\n %s" , leaseStoreID , sl )
1814-
1815- // Only consider live, non-draining, non-suspect replicas.
1816- existing , _ = a .storePool .liveAndDeadReplicas (existing , false /* includeSuspectNodes */ )
1817-
1818- // Short-circuit if there are no valid targets out there.
1819- if len (existing ) == 0 || (len (existing ) == 1 && existing [0 ].StoreID == source .StoreID ) {
1820- return false
1821- }
1877+ log .VEventf (ctx , 3 , "ShouldTransferLease (lease-holder=%d):\n %s" , leaseRepl , sl )
18221878
18231879 transferDec , _ := a .shouldTransferLeaseForAccessLocality (
18241880 ctx ,
@@ -1840,7 +1896,9 @@ func (a *Allocator) ShouldTransferLease(
18401896 log .Fatalf (ctx , "unexpected transfer decision %d" , transferDec )
18411897 }
18421898
1843- log .VEventf (ctx , 3 , "ShouldTransferLease decision (lease-holder=%d): %t" , leaseStoreID , result )
1899+ log .VEventf (
1900+ ctx , 3 , "ShouldTransferLease decision (lease-holder=s%d): %t" , leaseRepl .StoreID (), result ,
1901+ )
18441902 return result
18451903}
18461904
0 commit comments