Skip to content

Commit ecc931b

Browse files
committed
kvserver: metamorphically enable kv.expiration_leases_only.enabled
Epic: none Release note: None
1 parent 85c6e38 commit ecc931b

11 files changed

Lines changed: 109 additions & 46 deletions

pkg/kv/kvserver/client_lease_test.go

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
3232
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/storepool"
3333
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
34-
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
3534
"github.com/cockroachdb/cockroach/pkg/roachpb"
3635
"github.com/cockroachdb/cockroach/pkg/server"
3736
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
@@ -57,10 +56,15 @@ func TestStoreRangeLease(t *testing.T) {
5756
defer leaktest.AfterTest(t)()
5857
defer log.Scope(t).Close(t)
5958

59+
ctx := context.Background()
60+
st := cluster.MakeTestingClusterSettings()
61+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
62+
6063
tc := testcluster.StartTestCluster(t, 1,
6164
base.TestClusterArgs{
6265
ReplicationMode: base.ReplicationManual,
6366
ServerArgs: base.TestServerArgs{
67+
Settings: st,
6468
Knobs: base.TestingKnobs{
6569
Store: &kvserver.StoreTestingKnobs{
6670
DisableMergeQueue: true,
@@ -69,7 +73,7 @@ func TestStoreRangeLease(t *testing.T) {
6973
},
7074
},
7175
)
72-
defer tc.Stopper().Stop(context.Background())
76+
defer tc.Stopper().Stop(ctx)
7377

7478
store := tc.GetFirstStoreFromServer(t, 0)
7579
// NodeLivenessKeyMax is a static split point, so this is always
@@ -839,7 +843,6 @@ func TestLeaseholderRelocate(t *testing.T) {
839843

840844
// Make sure the lease is on 3 and is fully upgraded.
841845
tc.TransferRangeLeaseOrFatal(t, rhsDesc, tc.Target(2))
842-
tc.WaitForLeaseUpgrade(ctx, t, rhsDesc)
843846

844847
// Check that the lease moved to 3.
845848
leaseHolder, err := tc.FindRangeLeaseHolder(rhsDesc, nil)
@@ -872,17 +875,24 @@ func TestLeaseholderRelocate(t *testing.T) {
872875
require.NoError(t, err)
873876
require.Equal(t, tc.Target(3), leaseHolder)
874877

875-
// Double check that lease moved directly.
878+
// Double check that lease moved directly. The tail of the lease history
879+
// should all be on leaseHolder.NodeID. We may metamorphically enable
880+
// kv.expiration_leases_only.enabled, in which case there will be a single
881+
// expiration lease, but otherwise we'll have transferred an expiration lease
882+
// and then upgraded to an epoch lease.
876883
repl := tc.GetFirstStoreFromServer(t, 3).
877884
LookupReplica(roachpb.RKey(rhsDesc.StartKey.AsRawKey()))
878885
history := repl.GetLeaseHistory()
879886

880-
require.Equal(t, leaseHolder.NodeID,
881-
history[len(history)-1].Replica.NodeID)
882-
require.Equal(t, leaseHolder.NodeID,
883-
history[len(history)-2].Replica.NodeID) // account for the lease upgrade
884-
require.Equal(t, tc.Target(2).NodeID,
885-
history[len(history)-3].Replica.NodeID)
887+
require.Equal(t, leaseHolder.NodeID, history[len(history)-1].Replica.NodeID)
888+
var prevLeaseHolder roachpb.NodeID
889+
for i := len(history) - 1; i >= 0; i-- {
890+
if id := history[i].Replica.NodeID; id != leaseHolder.NodeID {
891+
prevLeaseHolder = id
892+
break
893+
}
894+
}
895+
require.Equal(t, tc.Target(2).NodeID, prevLeaseHolder)
886896
}
887897

888898
func gossipLiveness(t *testing.T, tc *testcluster.TestCluster) {
@@ -1100,15 +1110,20 @@ func TestLeasesDontThrashWhenNodeBecomesSuspect(t *testing.T) {
11001110
locality("us-west"),
11011111
locality("us-west"),
11021112
}
1113+
1114+
ctx := context.Background()
1115+
st := cluster.MakeTestingClusterSettings()
1116+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
1117+
11031118
// Speed up lease transfers.
11041119
stickyRegistry := server.NewStickyInMemEnginesRegistry()
11051120
defer stickyRegistry.CloseAllStickyInMemEngines()
1106-
ctx := context.Background()
11071121
manualClock := hlc.NewHybridManualClock()
11081122
serverArgs := make(map[int]base.TestServerArgs)
11091123
numNodes := 4
11101124
for i := 0; i < numNodes; i++ {
11111125
serverArgs[i] = base.TestServerArgs{
1126+
Settings: st,
11121127
Locality: localities[i],
11131128
Knobs: base.TestingKnobs{
11141129
Server: &server.TestingKnobs{
@@ -1346,21 +1361,18 @@ func TestAcquireLeaseTimeout(t *testing.T) {
13461361
return nil
13471362
}
13481363

1349-
// The lease request timeout depends on the Raft election timeout, so we set
1350-
// it low to get faster timeouts (800 ms) and speed up the test.
1351-
var raftCfg base.RaftConfig
1352-
raftCfg.SetDefaults()
1353-
raftCfg.RaftHeartbeatIntervalTicks = 1
1354-
raftCfg.RaftElectionTimeoutTicks = 2
1355-
13561364
manualClock := hlc.NewHybridManualClock()
13571365

13581366
// Start a two-node cluster.
13591367
const numNodes = 2
13601368
tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{
13611369
ReplicationMode: base.ReplicationManual,
13621370
ServerArgs: base.TestServerArgs{
1363-
RaftConfig: raftCfg,
1371+
RaftConfig: base.RaftConfig{
1372+
// Lease request timeout depends on Raft election timeout, speed it up.
1373+
RaftHeartbeatIntervalTicks: 1,
1374+
RaftElectionTimeoutTicks: 2,
1375+
},
13641376
Knobs: base.TestingKnobs{
13651377
Server: &server.TestingKnobs{
13661378
WallClock: manualClock,
@@ -1383,27 +1395,10 @@ func TestAcquireLeaseTimeout(t *testing.T) {
13831395
repl, err := tc.GetFirstStoreFromServer(t, 0).GetReplica(desc.RangeID)
13841396
require.NoError(t, err)
13851397

1386-
tc.IncrClockForLeaseUpgrade(t, manualClock)
1387-
tc.WaitForLeaseUpgrade(ctx, t, desc)
1388-
1389-
// Stop n2 and increment its epoch to invalidate the lease.
1398+
// Stop n2 and invalidate its leases by forwarding the clock.
13901399
tc.StopServer(1)
1391-
n2ID := tc.Server(1).NodeID()
1392-
lv, ok := tc.Server(0).NodeLiveness().(*liveness.NodeLiveness)
1393-
require.True(t, ok)
1394-
lvNode2, ok := lv.GetLiveness(n2ID)
1395-
require.True(t, ok)
1396-
manualClock.Forward(lvNode2.Expiration.WallTime)
1397-
1398-
testutils.SucceedsSoon(t, func() error {
1399-
lvNode2, ok = lv.GetLiveness(n2ID)
1400-
require.True(t, ok)
1401-
err := lv.IncrementEpoch(context.Background(), lvNode2.Liveness)
1402-
if errors.Is(err, liveness.ErrEpochAlreadyIncremented) {
1403-
return nil
1404-
}
1405-
return err
1406-
})
1400+
leaseDuration := tc.GetFirstStoreFromServer(t, 0).GetStoreConfig().RangeLeaseDuration
1401+
manualClock.Increment(leaseDuration.Nanoseconds())
14071402
require.False(t, repl.CurrentLeaseStatus(ctx).IsValid())
14081403

14091404
// Trying to acquire the lease should error with an empty NLHE, since the
@@ -1456,11 +1451,14 @@ func TestLeaseTransfersUseExpirationLeasesAndBumpToEpochBasedOnes(t *testing.T)
14561451
}{}
14571452

14581453
ctx := context.Background()
1454+
st := cluster.MakeTestingClusterSettings()
1455+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
14591456

14601457
manualClock := hlc.NewHybridManualClock()
14611458
tci := serverutils.StartNewTestCluster(t, 2, base.TestClusterArgs{
14621459
ReplicationMode: base.ReplicationManual,
14631460
ServerArgs: base.TestServerArgs{
1461+
Settings: st,
14641462
Knobs: base.TestingKnobs{
14651463
Server: &server.TestingKnobs{
14661464
// Never ticked -- demonstrating that we're not relying on
@@ -1527,6 +1525,8 @@ func TestLeaseUpgradeVersionGate(t *testing.T) {
15271525
clusterversion.ByKey(clusterversion.TODODelete_V22_2EnableLeaseUpgrade-1),
15281526
false, /* initializeVersion */
15291527
)
1528+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
1529+
15301530
tci := serverutils.StartNewTestCluster(t, 2, base.TestClusterArgs{
15311531
ReplicationMode: base.ReplicationManual,
15321532
ServerArgs: base.TestServerArgs{

pkg/kv/kvserver/client_merge_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,15 @@ func mergeCheckingTimestampCaches(
486486

487487
manualClock := hlc.NewHybridManualClock()
488488
ctx := context.Background()
489+
st := cluster.MakeTestingClusterSettings()
490+
// This test explicitly sets up a leader/leaseholder partition, which doesn't
491+
// work with expiration leases (the lease expires).
492+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
489493
tc := testcluster.StartTestCluster(t, 3,
490494
base.TestClusterArgs{
491495
ReplicationMode: base.ReplicationManual,
492496
ServerArgs: base.TestServerArgs{
497+
Settings: st,
493498
Knobs: base.TestingKnobs{
494499
Server: &server.TestingKnobs{
495500
WallClock: manualClock,
@@ -1012,9 +1017,6 @@ func TestStoreRangeMergeTimestampCacheCausality(t *testing.T) {
10121017
if !lhsRepl1.OwnsValidLease(ctx, tc.Servers[1].Clock().NowAsClockTimestamp()) {
10131018
return errors.New("s2 does not own valid lease for lhs range")
10141019
}
1015-
if lhsRepl1.CurrentLeaseStatus(ctx).Lease.Type() != roachpb.LeaseEpoch {
1016-
return errors.Errorf("lease still an expiration based lease")
1017-
}
10181020
return nil
10191021
})
10201022

pkg/kv/kvserver/client_raft_test.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,9 +1315,13 @@ func TestRequestsOnFollowerWithNonLiveLeaseholder(t *testing.T) {
13151315
return nil
13161316
}
13171317

1318+
st := cluster.MakeTestingClusterSettings()
1319+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
1320+
13181321
clusterArgs := base.TestClusterArgs{
13191322
ReplicationMode: base.ReplicationManual,
13201323
ServerArgs: base.TestServerArgs{
1324+
Settings: st,
13211325
// Reduce the election timeout some to speed up the test.
13221326
RaftConfig: base.RaftConfig{RaftElectionTimeoutTicks: 10},
13231327
Knobs: base.TestingKnobs{
@@ -1749,7 +1753,7 @@ func TestLogGrowthWhenRefreshingPendingCommands(t *testing.T) {
17491753
}
17501754
propNode := tc.GetFirstStoreFromServer(t, propIdx).TestSender()
17511755
tc.TransferRangeLeaseOrFatal(t, *leaderRepl.Desc(), tc.Target(propIdx))
1752-
tc.WaitForLeaseUpgrade(ctx, t, *leaderRepl.Desc())
1756+
tc.MaybeWaitForLeaseUpgrade(ctx, t, *leaderRepl.Desc())
17531757
testutils.SucceedsSoon(t, func() error {
17541758
// Lease transfers may not be immediately observed by the new
17551759
// leaseholder. Wait until the new leaseholder is aware.
@@ -4872,10 +4876,17 @@ func TestDefaultConnectionDisruptionDoesNotInterfereWithSystemTraffic(t *testing
48724876
},
48734877
}
48744878

4879+
// This test relies on epoch leases being invalidated when a node restart,
4880+
// which isn't true for expiration leases, so we disable expiration lease
4881+
// metamorphism.
4882+
st := cluster.MakeTestingClusterSettings()
4883+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
4884+
48754885
const numServers int = 3
48764886
stickyServerArgs := make(map[int]base.TestServerArgs)
48774887
for i := 0; i < numServers; i++ {
48784888
stickyServerArgs[i] = base.TestServerArgs{
4889+
Settings: st,
48794890
StoreSpecs: []base.StoreSpec{
48804891
{
48814892
InMemory: true,

pkg/kv/kvserver/client_replica_circuit_breaker_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,10 @@ func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
700700
var rangeID int64 // atomic
701701
slowThresh := &atomic.Value{} // supports .SetSlowThreshold(x)
702702
slowThresh.Store(time.Duration(0))
703+
ctx := context.Background()
704+
st := cluster.MakeTestingClusterSettings()
705+
// TODO(erikgrinaker): We may not need this for all circuit breaker tests.
706+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
703707
storeKnobs := &kvserver.StoreTestingKnobs{
704708
SlowReplicationThresholdOverride: func(ba *kvpb.BatchRequest) time.Duration {
705709
t.Helper()
@@ -748,6 +752,7 @@ func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
748752
args := base.TestClusterArgs{
749753
ReplicationMode: base.ReplicationManual,
750754
ServerArgs: base.TestServerArgs{
755+
Settings: st,
751756
RaftConfig: raftCfg,
752757
Knobs: base.TestingKnobs{
753758
Server: &server.TestingKnobs{

pkg/kv/kvserver/client_replica_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,11 +2060,14 @@ func TestLeaseMetricsOnSplitAndTransfer(t *testing.T) {
20602060
return nil
20612061
}
20622062
ctx := context.Background()
2063+
st := cluster.MakeTestingClusterSettings()
2064+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
20632065
manualClock := hlc.NewHybridManualClock()
20642066
tc := testcluster.StartTestCluster(t, 2,
20652067
base.TestClusterArgs{
20662068
ReplicationMode: base.ReplicationManual,
20672069
ServerArgs: base.TestServerArgs{
2070+
Settings: st,
20682071
Knobs: base.TestingKnobs{
20692072
Store: &kvserver.StoreTestingKnobs{
20702073
EvalKnobs: kvserverbase.BatchEvalTestingKnobs{
@@ -4358,6 +4361,11 @@ func TestStrictGCEnforcement(t *testing.T) {
43584361
protectedts.PollInterval.Override(ctx, &tc.Server(0).ClusterSettings().SV, 500*time.Hour)
43594362
defer protectedts.PollInterval.Override(ctx, &tc.Server(0).ClusterSettings().SV, 2*time.Minute)
43604363

4364+
// Disable follower reads. When metamorphically enabling expiration-based
4365+
// leases, an expired lease will cause a follower read which bypasses the
4366+
// strict GC enforcement.
4367+
sqlDB.Exec(t, "SET CLUSTER SETTING kv.closed_timestamp.follower_reads_enabled = false")
4368+
43614369
sqlDB.Exec(t, "SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10 ms'")
43624370
defer sqlDB.Exec(t, `SET CLUSTER SETTING kv.gc_ttl.strict_enforcement.enabled = DEFAULT`)
43634371
setStrictGC(t, true)

pkg/kv/kvserver/client_split_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/cockroachdb/cockroach/pkg/roachpb"
4444
"github.com/cockroachdb/cockroach/pkg/rpc"
4545
"github.com/cockroachdb/cockroach/pkg/server"
46+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
4647
"github.com/cockroachdb/cockroach/pkg/spanconfig"
4748
"github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap"
4849
"github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema"
@@ -2784,11 +2785,14 @@ func TestStoreCapacityAfterSplit(t *testing.T) {
27842785
defer leaktest.AfterTest(t)()
27852786
defer log.Scope(t).Close(t)
27862787
ctx := context.Background()
2788+
st := cluster.MakeTestingClusterSettings()
2789+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
27872790
manualClock := hlc.NewHybridManualClock()
27882791
tc := testcluster.StartTestCluster(t, 2,
27892792
base.TestClusterArgs{
27902793
ReplicationMode: base.ReplicationManual,
27912794
ServerArgs: base.TestServerArgs{
2795+
Settings: st,
27922796
Knobs: base.TestingKnobs{
27932797
Server: &server.TestingKnobs{
27942798
WallClock: manualClock,

pkg/kv/kvserver/closed_timestamp_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
3232
"github.com/cockroachdb/cockroach/pkg/roachpb"
3333
"github.com/cockroachdb/cockroach/pkg/server"
34+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
3435
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
3536
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside"
3637
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
@@ -671,8 +672,13 @@ func TestClosedTimestampFrozenAfterSubsumption(t *testing.T) {
671672
st := mergeFilter{}
672673
manual := hlc.NewHybridManualClock()
673674
pinnedLeases := kvserver.NewPinnedLeases()
675+
676+
cs := cluster.MakeTestingClusterSettings()
677+
kvserver.ExpirationLeasesOnly.Override(ctx, &cs.SV, false) // override metamorphism
678+
674679
clusterArgs := base.TestClusterArgs{
675680
ServerArgs: base.TestServerArgs{
681+
Settings: cs,
676682
RaftConfig: base.RaftConfig{
677683
// We set the raft election timeout to a small duration. This should
678684
// result in the node liveness duration being ~3.6 seconds. Note that

pkg/kv/kvserver/replica_range_lease.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ import (
5757
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil"
5858
"github.com/cockroachdb/cockroach/pkg/roachpb"
5959
"github.com/cockroachdb/cockroach/pkg/settings"
60+
"github.com/cockroachdb/cockroach/pkg/util"
6061
"github.com/cockroachdb/cockroach/pkg/util/hlc"
6162
"github.com/cockroachdb/cockroach/pkg/util/log"
6263
"github.com/cockroachdb/cockroach/pkg/util/retry"
@@ -75,11 +76,11 @@ var transferExpirationLeasesFirstEnabled = settings.RegisterBoolSetting(
7576
true,
7677
)
7778

78-
var expirationLeasesOnly = settings.RegisterBoolSetting(
79+
var ExpirationLeasesOnly = settings.RegisterBoolSetting(
7980
settings.SystemOnly,
8081
"kv.expiration_leases_only.enabled",
8182
"only use expiration-based leases, never epoch-based ones (experimental, affects performance)",
82-
false,
83+
util.ConstantWithMetamorphicTestBool("kv.expiration_leases_only.enabled", false),
8384
)
8485

8586
var leaseStatusLogLimiter = func() *log.EveryN {
@@ -789,7 +790,7 @@ func (r *Replica) requiresExpirationLeaseRLocked() bool {
789790
// expiration-based lease, either because it requires one or because
790791
// kv.expiration_leases_only.enabled is enabled.
791792
func (r *Replica) shouldUseExpirationLeaseRLocked() bool {
792-
return expirationLeasesOnly.Get(&r.ClusterSettings().SV) || r.requiresExpirationLeaseRLocked()
793+
return ExpirationLeasesOnly.Get(&r.ClusterSettings().SV) || r.requiresExpirationLeaseRLocked()
793794
}
794795

795796
// requestLeaseLocked executes a request to obtain or extend a lease

pkg/kv/kvserver/replica_rangefeed_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,10 +1242,14 @@ func TestRangefeedCheckpointsRecoverFromLeaseExpiration(t *testing.T) {
12421242
// evaluating on the scratch range.
12431243
var rejectExtraneousRequests int64 // accessed atomically
12441244

1245+
st := cluster.MakeTestingClusterSettings()
1246+
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
1247+
12451248
cargs := aggressiveResolvedTimestampClusterArgs
12461249
cargs.ReplicationMode = base.ReplicationManual
12471250
manualClock := hlc.NewHybridManualClock()
12481251
cargs.ServerArgs = base.TestServerArgs{
1252+
Settings: st,
12491253
Knobs: base.TestingKnobs{
12501254
Server: &server.TestingKnobs{
12511255
WallClock: manualClock,

0 commit comments

Comments
 (0)