@@ -207,18 +207,19 @@ static class FailedJoinAttempt {
207207 private final DiscoveryNode destination ;
208208 private final JoinRequest joinRequest ;
209209 private final ElasticsearchException exception ;
210- private final long timestamp ;
210+ private final long attemptTimeMillis ;
211211
212212 /**
213- * @param destination the master node targeted by the join request.
214- * @param joinRequest the join request that was sent to the perceived master node.
215- * @param exception the error response received in reply to the join request attempt.
213+ * @param destination the master node targeted by the join request.
214+ * @param joinRequest the join request that was sent to the perceived master node.
215+ * @param exception the error response received in reply to the join request attempt.
216+ * @param attemptTimeMillis the (relative milliseconds) time at which the failed attempt occurred.
216217 */
217- FailedJoinAttempt (DiscoveryNode destination , JoinRequest joinRequest , ElasticsearchException exception ) {
218+ FailedJoinAttempt (DiscoveryNode destination , JoinRequest joinRequest , ElasticsearchException exception , long attemptTimeMillis ) {
218219 this .destination = destination ;
219220 this .joinRequest = joinRequest ;
220221 this .exception = exception ;
221- this .timestamp = System . nanoTime () ;
222+ this .attemptTimeMillis = attemptTimeMillis ;
222223 }
223224
224225 /**
@@ -243,15 +244,15 @@ static Level getLogLevel(ElasticsearchException e) {
243244 return Level .INFO ;
244245 }
245246
246- void logWarnWithTimestamp () {
247+ void logWarnWithTimestamp (LongSupplier relativeTimeMillisSupplier ) {
247248 logger .warn (
248249 () -> format (
249250 "last failed join attempt was %s ago, failed to join %s with %s" ,
250- // 'timestamp ' is when this error exception was received by the local node. If the time that has passed since the error
251- // was originally received is quite large, it could indicate that this is a stale error exception from some prior
252- // out-of-order request response (where a later sent request but earlier received response was successful); or
251+ // 'attemptTimeMillis ' is when this error exception was received by the local node. If the time that has passed since
252+ // the error was originally received is quite large, it could indicate that this is a stale error exception from some
253+ // prior out-of-order request response (where a later sent request but earlier received response was successful); or
253254 // alternatively an old error could indicate that this node did not retry the join request for a very long time.
254- TimeValue .timeValueMillis (TimeValue . nsecToMSec ( System . nanoTime ( ) - timestamp ) ),
255+ TimeValue .timeValueMillis (relativeTimeMillisSupplier . getAsLong ( ) - attemptTimeMillis ),
255256 destination ,
256257 joinRequest
257258 ),
@@ -266,7 +267,7 @@ void logWarnWithTimestamp() {
266267 void logLastFailedJoinAttempt () {
267268 FailedJoinAttempt attempt = lastFailedJoinAttempt .get ();
268269 if (attempt != null ) {
269- attempt .logWarnWithTimestamp ();
270+ attempt .logWarnWithTimestamp (transportService . getThreadPool (). relativeTimeInMillisSupplier () );
270271 lastFailedJoinAttempt .compareAndSet (attempt , null );
271272 }
272273 }
@@ -299,7 +300,12 @@ public void sendJoinRequest(DiscoveryNode destination, long term, Optional<Join>
299300 pendingJoinInfo .message = PENDING_JOIN_FAILED ;
300301 pendingOutgoingJoins .remove (dedupKey );
301302 if (e instanceof ElasticsearchException elasticsearchException ) {
302- final var attempt = new FailedJoinAttempt (destination , joinRequest , elasticsearchException );
303+ final var attempt = new FailedJoinAttempt (
304+ destination ,
305+ joinRequest ,
306+ elasticsearchException ,
307+ transportService .getThreadPool ().relativeTimeInMillis ()
308+ );
303309 attempt .logNow ();
304310 lastFailedJoinAttempt .set (attempt );
305311 assert elasticsearchException instanceof CircuitBreakingException : e ; // others shouldn't happen, handle them anyway
@@ -374,7 +380,12 @@ public void onFailure(Exception e) {
374380 private void cleanUpOnFailure (TransportException exp ) {
375381 pendingJoinInfo .message = PENDING_JOIN_FAILED ;
376382 pendingOutgoingJoins .remove (dedupKey );
377- final var attempt = new FailedJoinAttempt (destination , joinRequest , exp );
383+ final var attempt = new FailedJoinAttempt (
384+ destination ,
385+ joinRequest ,
386+ exp ,
387+ transportService .getThreadPool ().relativeTimeInMillis ()
388+ );
378389 attempt .logNow ();
379390 lastFailedJoinAttempt .set (attempt );
380391 unregisterAndReleaseConnection (destination , connectionReference );
@@ -390,7 +401,8 @@ public void onFailure(Exception e) {
390401 final var attempt = new FailedJoinAttempt (
391402 destination ,
392403 joinRequest ,
393- new ConnectTransportException (destination , "failed to acquire connection" , e )
404+ new ConnectTransportException (destination , "failed to acquire connection" , e ),
405+ transportService .getThreadPool ().relativeTimeInMillis ()
394406 );
395407 attempt .logNow ();
396408 lastFailedJoinAttempt .set (attempt );
0 commit comments