cockroachdb
diff --git a/‎pkg/kv/kvserver/batcheval/cmd_lease_test.go‎
Lines changed: 64 additions & 0 deletions b/‎pkg/kv/kvserver/batcheval/cmd_lease_test.go‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎pkg/kv/kvserver/batcheval/cmd_lease_transfer.go‎
Lines changed: 12 additions & 3 deletions b/‎pkg/kv/kvserver/batcheval/cmd_lease_transfer.go‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎pkg/kv/kvserver/batcheval/cmd_push_txn.go‎
Lines changed: 2 additions & 0 deletions b/‎pkg/kv/kvserver/batcheval/cmd_push_txn.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/kv/kvserver/client_merge_test.go‎
Lines changed: 32 additions & 32 deletions b/‎pkg/kv/kvserver/client_merge_test.go‎
Lines changed: 32 additions & 32 deletions
diff --git a/‎pkg/kv/kvserver/client_raft_test.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/kv/kvserver/client_raft_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/kv/kvserver/client_replica_test.go‎
Lines changed: 6 additions & 0 deletions b/‎pkg/kv/kvserver/client_replica_test.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎pkg/kv/kvserver/concurrency/concurrency_manager.go‎
Lines changed: 4 additions & 2 deletions b/‎pkg/kv/kvserver/concurrency/concurrency_manager.go‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎pkg/kv/kvserver/concurrency/concurrency_manager_test.go‎
Lines changed: 19 additions & 2 deletions b/‎pkg/kv/kvserver/concurrency/concurrency_manager_test.go‎
Lines changed: 19 additions & 2 deletions
@@ -17,7 +17,10 @@ import (
 
 	"github.com/cockroachdb/cockroach/pkg/base"
 	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/storage"
+	"github.com/cockroachdb/cockroach/pkg/testutils"
 	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
+	"github.com/cockroachdb/cockroach/pkg/util/hlc"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
 	"github.com/stretchr/testify/require"
@@ -120,10 +123,13 @@ func TestLeaseCommandLearnerReplica(t *testing.T) {
 	}
 	desc := roachpb.RangeDescriptor{}
 	desc.SetReplicas(roachpb.MakeReplicaSet(replicas))
+	manual := hlc.NewManualClock(123)
+	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
 	cArgs := CommandArgs{
 		EvalCtx: (&MockEvalCtx{
 			StoreID: voterStoreID,
 			Desc:    &desc,
+			Clock:   clock,
 		}).EvalContext(),
 		Args: &roachpb.TransferLeaseRequest{
 			Lease: roachpb.Lease{
@@ -157,6 +163,64 @@ func TestLeaseCommandLearnerReplica(t *testing.T) {
 	require.EqualError(t, err, expForLearner)
 }
 
+// TestLeaseTransferForwardsStartTime tests that during a lease transfer, the
+// start time of the new lease is determined during evaluation, after latches
+// have granted the lease transfer full mutual exclusion over the leaseholder.
+func TestLeaseTransferForwardsStartTime(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	testutils.RunTrueAndFalse(t, "epoch", func(t *testing.T, epoch bool) {
+		ctx := context.Background()
+		db := storage.NewDefaultInMem()
+		defer db.Close()
+		batch := db.NewBatch()
+		defer batch.Close()
+
+		replicas := []roachpb.ReplicaDescriptor{
+			{NodeID: 1, StoreID: 1, Type: roachpb.ReplicaTypeVoterFull(), ReplicaID: 1},
+			{NodeID: 2, StoreID: 2, Type: roachpb.ReplicaTypeVoterFull(), ReplicaID: 2},
+		}
+		desc := roachpb.RangeDescriptor{}
+		desc.SetReplicas(roachpb.MakeReplicaSet(replicas))
+		manual := hlc.NewManualClock(123)
+		clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
+
+		nextLease := roachpb.Lease{
+			Replica: replicas[1],
+			Start:   clock.NowAsClockTimestamp(),
+		}
+		if epoch {
+			nextLease.Epoch = 1
+		} else {
+			exp := nextLease.Start.ToTimestamp().Add(9*time.Second.Nanoseconds(), 0)
+			nextLease.Expiration = &exp
+		}
+		cArgs := CommandArgs{
+			EvalCtx: (&MockEvalCtx{
+				StoreID: 1,
+				Desc:    &desc,
+				Clock:   clock,
+			}).EvalContext(),
+			Args: &roachpb.TransferLeaseRequest{
+				Lease: nextLease,
+			},
+		}
+
+		manual.Increment(1000)
+		beforeEval := clock.NowAsClockTimestamp()
+
+		res, err := TransferLease(ctx, batch, cArgs, nil)
+		require.NoError(t, err)
+
+		// The proposed lease start time should be assigned at eval time.
+		propLease := res.Replicated.State.Lease
+		require.NotNil(t, propLease)
+		require.True(t, nextLease.Start.Less(propLease.Start))
+		require.True(t, beforeEval.Less(propLease.Start))
+	})
+}
+
 func TestCheckCanReceiveLease(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
 
@@ -67,13 +67,22 @@ func TransferLease(
 	// LeaseRejectedError before going through Raft.
 	prevLease, _ := cArgs.EvalCtx.GetLease()
 
+	// Forward the lease's start time to a current clock reading. At this
+	// point, we're holding latches across the entire range, we know that
+	// this time is greater than the timestamps at which any request was
+	// serviced by the leaseholder before it stopped serving requests (i.e.
+	// before the TransferLease request acquired latches).
+	newLease := args.Lease
+	newLease.Start.Forward(cArgs.EvalCtx.Clock().NowAsClockTimestamp())
+	args.Lease = roachpb.Lease{} // prevent accidental use below
+
 	// If this check is removed at some point, the filtering of learners on the
 	// sending side would have to be removed as well.
-	if err := roachpb.CheckCanReceiveLease(args.Lease.Replica, cArgs.EvalCtx.Desc()); err != nil {
+	if err := roachpb.CheckCanReceiveLease(newLease.Replica, cArgs.EvalCtx.Desc()); err != nil {
 		return newFailedLeaseTrigger(true /* isTransfer */), err
 	}
 
-	log.VEventf(ctx, 2, "lease transfer: prev lease: %+v, new lease: %+v", prevLease, args.Lease)
+	log.VEventf(ctx, 2, "lease transfer: prev lease: %+v, new lease: %+v", prevLease, newLease)
 	return evalNewLease(ctx, cArgs.EvalCtx, readWriter, cArgs.Stats,
-		args.Lease, prevLease, false /* isExtension */, true /* isTransfer */)
+		newLease, prevLease, false /* isExtension */, true /* isTransfer */)
 }
@@ -124,6 +124,8 @@ func PushTxn(
 		return result.Result{}, errors.Errorf("request timestamp %s less than pushee txn timestamp %s", h.Timestamp, args.PusheeTxn.WriteTimestamp)
 	}
 	now := cArgs.EvalCtx.Clock().Now()
+	// TODO(nvanbenschoten): remove this limitation. But when doing so,
+	// keep the h.Timestamp.Less(args.PushTo) check above.
 	if now.Less(h.Timestamp) {
 		// The batch's timestamp should have been used to update the clock.
 		return result.Result{}, errors.Errorf("request timestamp %s less than current clock time %s", h.Timestamp, now)
 
@@ -34,7 +34,6 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
-	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
 	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
@@ -1373,21 +1372,22 @@ func TestStoreRangeMergeRHSLeaseExpiration(t *testing.T) {
 	}
 
 	// Install a hook to observe when a get or a put request for a special key,
-	// rhsSentinel, acquires latches and begins evaluating.
+	// rhsSentinel, hits a MergeInProgressError and begins waiting on the merge.
 	const reqConcurrency = 10
 	var rhsSentinel roachpb.Key
-	reqAcquiredLatch := make(chan struct{}, reqConcurrency)
-	testingLatchFilter := func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error {
-		for _, r := range ba.Requests {
-			req := r.GetInner()
-			switch req.Method() {
-			case roachpb.Get, roachpb.Put:
-				if req.Header().Key.Equal(rhsSentinel) {
-					reqAcquiredLatch <- struct{}{}
+	reqWaitingOnMerge := make(chan struct{}, reqConcurrency)
+	testingConcurrencyRetryFilter := func(_ context.Context, ba roachpb.BatchRequest, pErr *roachpb.Error) {
+		if _, ok := pErr.GetDetail().(*roachpb.MergeInProgressError); ok {
+			for _, r := range ba.Requests {
+				req := r.GetInner()
+				switch req.Method() {
+				case roachpb.Get, roachpb.Put:
+					if req.Header().Key.Equal(rhsSentinel) {
+						reqWaitingOnMerge <- struct{}{}
+					}
 				}
 			}
 		}
-		return nil
 	}
 
 	manualClock := hlc.NewHybridManualClock()
@@ -1401,8 +1401,9 @@ func TestStoreRangeMergeRHSLeaseExpiration(t *testing.T) {
 						ClockSource: manualClock.UnixNano,
 					},
 					Store: &kvserver.StoreTestingKnobs{
-						TestingRequestFilter: testingRequestFilter,
-						TestingLatchFilter:   testingLatchFilter,
+						TestingRequestFilter:                    testingRequestFilter,
+						TestingConcurrencyRetryFilter:           testingConcurrencyRetryFilter,
+						AllowLeaseRequestProposalsWhenNotLeader: true,
 					},
 				},
 			},
@@ -1416,6 +1417,7 @@ func TestStoreRangeMergeRHSLeaseExpiration(t *testing.T) {
 	// during the merge.
 	lhsDesc, rhsDesc, err := tc.Servers[0].ScratchRangeWithExpirationLeaseEx()
 	require.NoError(t, err)
+	rhsSentinel = rhsDesc.StartKey.AsRawKey()
 
 	tc.AddVotersOrFatal(t, lhsDesc.StartKey.AsRawKey(), tc.Target(1))
 	tc.AddVotersOrFatal(t, rhsDesc.StartKey.AsRawKey(), tc.Target(1))
@@ -1438,26 +1440,26 @@ func TestStoreRangeMergeRHSLeaseExpiration(t *testing.T) {
 	// is aware of the merge and is refusing all traffic, so we can't just send a
 	// TransferLease request. Instead, we need to expire the second store's lease,
 	// then acquire the lease on the first store.
+	toAdvance := store.GetStoreConfig().LeaseExpiration()
 
 	// Before doing so, however, ensure that the merge transaction has written
 	// its transaction record so that it doesn't run into trouble with the low
 	// water mark of the new leaseholder's timestamp cache. This could result in
 	// the transaction being inadvertently aborted during its first attempt,
 	// which this test is not designed to handle. If the merge transaction did
 	// abort then the get requests could complete on r2 before the merge retried.
-	hb, hbH := heartbeatArgs(mergeTxn, tc.Servers[0].Clock().Now())
+	//
+	// We heartbeat the merge's transaction record with a timestamp forwarded by
+	// the duration we plan to advance the clock by so that the transaction does
+	// not look expired even after the manual clock update.
+	afterAdvance := tc.Servers[0].Clock().Now().Add(toAdvance, 0)
+	hb, hbH := heartbeatArgs(mergeTxn, afterAdvance)
 	if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), hbH, hb); pErr != nil {
 		t.Fatal(pErr)
 	}
 
-	// Turn off liveness heartbeats on the second store, then advance the clock
-	// past the liveness expiration time. This expires all leases on all stores.
-	tc.Servers[1].NodeLiveness().(*liveness.NodeLiveness).PauseHeartbeatLoopForTest()
-	manualClock.Increment(store.GetStoreConfig().LeaseExpiration())
-
-	// Manually heartbeat the liveness on the first store to ensure it's
-	// considered live. The automatic heartbeat might not come for a while.
-	require.NoError(t, tc.HeartbeatNodeLiveness(0))
+	// Then increment the clock to expire all leases.
+	manualClock.Increment(toAdvance)
 
 	// Send several get and put requests to the RHS. The first of these to
 	// arrive will acquire the lease; the remaining requests will wait for that
@@ -1513,19 +1515,17 @@ func TestStoreRangeMergeRHSLeaseExpiration(t *testing.T) {
 		time.Sleep(time.Millisecond)
 	}
 
-	// Wait for the get and put requests to acquire latches, which is as far as
-	// they can get while the merge is in progress. Then wait a little bit
-	// longer. This tests that the requests really do get stuck waiting for the
-	// merge to complete without depending too heavily on implementation
-	// details.
+	// Wait for the get and put requests to begin waiting on the merge to
+	// complete. Then wait a little bit longer. This tests that the requests
+	// really do get stuck waiting for the merge to complete without depending
+	// too heavily on implementation details.
 	for i := 0; i < reqConcurrency; i++ {
 		select {
-		case <-reqAcquiredLatch:
-			// Latch acquired.
+		case <-reqWaitingOnMerge:
+			// Waiting on merge.
 		case pErr := <-reqErrs:
-			// Requests may never make it to the latch acquisition if s1 has not
-			// yet learned s2's lease is expired. Instead, we'll see a
-			// NotLeaseholderError.
+			// Requests may never wait on the merge if s1 has not yet learned
+			// s2's lease is expired. Instead, we'll see a NotLeaseholderError.
 			require.IsType(t, &roachpb.NotLeaseHolderError{}, pErr.GetDetail())
 		}
 	}
 
@@ -1089,7 +1089,7 @@ func TestRequestsOnLaggingReplica(t *testing.T) {
 	tc := testcluster.StartTestCluster(t, 3, clusterArgs)
 	defer tc.Stopper().Stop(ctx)
 
-	rngDesc, err := tc.Servers[0].ScratchRangeEx()
+	_, rngDesc, err := tc.Servers[0].ScratchRangeEx()
 	require.NoError(t, err)
 	key := rngDesc.StartKey.AsRawKey()
 	// Add replicas on all the stores.
 
@@ -3439,6 +3439,12 @@ func TestDiscoverIntentAcrossLeaseTransferAwayAndBack(t *testing.T) {
 	err = tc.MoveRangeLeaseNonCooperatively(rangeDesc, tc.Target(1), manual)
 	require.NoError(t, err)
 
+	// Send an arbitrary request to the range to update the range descriptor
+	// cache with the new lease. This prevents the rollback from getting stuck
+	// waiting on latches held by txn2's read on the old leaseholder.
+	_, err = kvDB.Get(ctx, "c")
+	require.NoError(t, err)
+
 	// Roll back txn1.
 	err = txn1.Rollback(ctx)
 	require.NoError(t, err)
 
@@ -89,6 +89,7 @@ func NewManager(cfg Config) Manager {
 		lt: lt,
 		ltw: &lockTableWaiterImpl{
 			st:                cfg.Settings,
+			clock:             cfg.Clock,
 			stopper:           cfg.Stopper,
 			ir:                cfg.IntentResolver,
 			lt:                lt,
@@ -443,9 +444,10 @@ func (g *Guard) HoldingLatches() bool {
 	return g != nil && g.lg != nil
 }
 
-// AssertLatches asserts that the guard is non-nil and holding latches.
+// AssertLatches asserts that the guard is non-nil and holding latches, if the
+// request is supposed to hold latches while evaluating in the first place.
 func (g *Guard) AssertLatches() {
-	if !g.HoldingLatches() {
+	if shouldAcquireLatches(g.Req) && !g.HoldingLatches() {
 		panic("expected latches held, found none")
 	}
 }
 
@@ -73,6 +73,7 @@ import (
 // debug-latch-manager
 // debug-lock-table
 // debug-disable-txn-pushes
+// debug-set-clock           ts=<secs>
 // reset
 //
 func TestConcurrencyManagerBasic(t *testing.T) {
@@ -119,7 +120,6 @@ func TestConcurrencyManagerBasic(t *testing.T) {
 					ReadTimestamp: ts,
 					MaxTimestamp:  maxTS,
 				}
-				txn.UpdateObservedTimestamp(c.nodeDesc.NodeID, ts.UnsafeToClockTimestamp())
 				c.registerTxn(txnName, txn)
 				return ""
 
@@ -459,6 +459,17 @@ func TestConcurrencyManagerBasic(t *testing.T) {
 				c.disableTxnPushes()
 				return ""
 
+			case "debug-set-clock":
+				var secs int
+				d.ScanArgs(t, "ts", &secs)
+
+				nanos := int64(secs) * time.Second.Nanoseconds()
+				if nanos < c.manual.UnixNano() {
+					d.Fatalf(t, "manual clock must advance")
+				}
+				c.manual.Set(nanos)
+				return ""
+
 			case "reset":
 				if n := mon.numMonitored(); n > 0 {
 					d.Fatalf(t, "%d requests still in flight", n)
@@ -494,6 +505,8 @@ type cluster struct {
 	nodeDesc  *roachpb.NodeDescriptor
 	rangeDesc *roachpb.RangeDescriptor
 	st        *clustersettings.Settings
+	manual    *hlc.ManualClock
+	clock     *hlc.Clock
 	m         concurrency.Manager
 
 	// Definitions.
@@ -523,10 +536,13 @@ type txnPush struct {
 }
 
 func newCluster() *cluster {
+	manual := hlc.NewManualClock(123 * time.Second.Nanoseconds())
 	return &cluster{
-		st:        clustersettings.MakeTestingClusterSettings(),
 		nodeDesc:  &roachpb.NodeDescriptor{NodeID: 1},
 		rangeDesc: &roachpb.RangeDescriptor{RangeID: 1},
+		st:        clustersettings.MakeTestingClusterSettings(),
+		manual:    manual,
+		clock:     hlc.NewClock(manual.UnixNano, time.Nanosecond),
 
 		txnsByName:      make(map[string]*roachpb.Transaction),
 		requestsByName:  make(map[string]testReq),
@@ -541,6 +557,7 @@ func (c *cluster) makeConfig() concurrency.Config {
 		NodeDesc:       c.nodeDesc,
 		RangeDesc:      c.rangeDesc,
 		Settings:       c.st,
+		Clock:          c.clock,
 		IntentResolver: c,
 		OnContentionEvent: func(ev *roachpb.ContentionEvent) {
 			ev.Duration = 1234 * time.Millisecond // for determinism
Original file line number	Diff line number	Diff line change
`@@ -124,6 +124,8 @@ func PushTxn(`
`124`	`124`	`return result.Result{}, errors.Errorf("request timestamp %s less than pushee txn timestamp %s", h.Timestamp, args.PusheeTxn.WriteTimestamp)`
`125`	`125`	`}`
`126`	`126`	`now := cArgs.EvalCtx.Clock().Now()`
	`127`	`+ // TODO(nvanbenschoten): remove this limitation. But when doing so,`
	`128`	`+ // keep the h.Timestamp.Less(args.PushTo) check above.`
`127`	`129`	`if now.Less(h.Timestamp) {`
`128`	`130`	`// The batch's timestamp should have been used to update the clock.`
`129`	`131`	`return result.Result{}, errors.Errorf("request timestamp %s less than current clock time %s", h.Timestamp, now)`
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ func NewManager(cfg Config) Manager {`
`89`	`89`	`lt: lt,`
`90`	`90`	`ltw: &lockTableWaiterImpl{`
`91`	`91`	`st: cfg.Settings,`
	`92`	`+ clock: cfg.Clock,`
`92`	`93`	`stopper: cfg.Stopper,`
`93`	`94`	`ir: cfg.IntentResolver,`
`94`	`95`	`lt: lt,`
`@@ -443,9 +444,10 @@ func (g *Guard) HoldingLatches() bool {`
`443`	`444`	`return g != nil && g.lg != nil`
`444`	`445`	`}`
`445`	`446`
`446`		`-// AssertLatches asserts that the guard is non-nil and holding latches.`
	`447`	`+// AssertLatches asserts that the guard is non-nil and holding latches, if the`
	`448`	`+// request is supposed to hold latches while evaluating in the first place.`
`447`	`449`	`func (g *Guard) AssertLatches() {`
`448`		`- if !g.HoldingLatches() {`
	`450`	`+ if shouldAcquireLatches(g.Req) && !g.HoldingLatches() {`
`449`	`451`	`panic("expected latches held, found none")`
`450`	`452`	`}`
`451`	`453`	`}`