elastic
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetService.java‎
Lines changed: 1 addition & 22 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/PeerRecoveryTargetService.java‎
Lines changed: 1 addition & 22 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryDeleteRecoveredFilesRequest.java‎
Lines changed: 0 additions & 47 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryDeleteRecoveredFilesRequest.java‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryFilesInfoRequest.java‎
Lines changed: 14 additions & 1 deletion b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryFilesInfoRequest.java‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoverySettings.java‎
Lines changed: 1 addition & 0 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoverySettings.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java‎
Lines changed: 75 additions & 44 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java‎
Lines changed: 75 additions & 44 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java‎
Lines changed: 4 additions & 8 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTargetHandler.java‎
Lines changed: 1 addition & 9 deletions b/‎server/src/main/java/org/elasticsearch/indices/recovery/RecoveryTargetHandler.java‎
Lines changed: 1 addition & 9 deletions
@@ -78,7 +78,6 @@ public class PeerRecoveryTargetService implements IndexEventListener {
     public static class Actions {
         public static final String FILES_INFO = "internal:index/shard/recovery/filesInfo";
         public static final String RESTORE_FILE_FROM_SNAPSHOT = "internal:index/shard/recovery/restore_file_from_snapshot";
-        public static final String DELETE_RECOVERED_FILES = "internal:index/shard/recovery/delete_recovered_files";
         public static final String FILE_CHUNK = "internal:index/shard/recovery/file_chunk";
         public static final String CLEAN_FILES = "internal:index/shard/recovery/clean_files";
         public static final String TRANSLOG_OPS = "internal:index/shard/recovery/translog_ops";
@@ -113,8 +112,6 @@ public PeerRecoveryTargetService(ThreadPool threadPool,
             new FilesInfoRequestHandler());
         transportService.registerRequestHandler(Actions.RESTORE_FILE_FROM_SNAPSHOT, ThreadPool.Names.GENERIC,
             RecoverySnapshotFileRequest::new, new RestoreFileFromSnapshotTransportRequestHandler());
-        transportService.registerRequestHandler(Actions.DELETE_RECOVERED_FILES, ThreadPool.Names.GENERIC,
-            RecoveryDeleteRecoveredFilesRequest::new, new DeleteRecoveredFilesTransportRequestHandler());
         transportService.registerRequestHandler(Actions.FILE_CHUNK, ThreadPool.Names.GENERIC, RecoveryFileChunkRequest::new,
             new FileChunkTransportRequestHandler());
         transportService.registerRequestHandler(Actions.CLEAN_FILES, ThreadPool.Names.GENERIC,
@@ -418,7 +415,7 @@ public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel c
 
                 recoveryRef.target().receiveFileInfo(
                     request.phase1FileNames, request.phase1FileSizes, request.phase1ExistingFileNames, request.phase1ExistingFileSizes,
-                    request.totalTranslogOps, listener);
+                    request.totalTranslogOps, request.deleteRecoveredFiles, listener);
             }
         }
     }
@@ -497,24 +494,6 @@ public void messageReceived(final RecoverySnapshotFileRequest request, Transport
         }
     }
 
-    class DeleteRecoveredFilesTransportRequestHandler implements TransportRequestHandler<RecoveryDeleteRecoveredFilesRequest> {
-        @Override
-        public void messageReceived(RecoveryDeleteRecoveredFilesRequest request,
-                                    TransportChannel channel,
-                                    Task task) throws Exception {
-            try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.getRecoveryId(), request.getShardId())) {
-                final RecoveryTarget recoveryTarget = recoveryRef.target();
-                final ActionListener<Void> listener =
-                    createOrFinishListener(recoveryRef, channel, Actions.DELETE_RECOVERED_FILES, request);
-                if (listener == null) {
-                    return;
-                }
-
-                recoveryTarget.deleteRecoveredFiles(listener);
-            }
-        }
-    }
-
 
     private ActionListener<Void> createOrFinishListener(final RecoveryRef recoveryRef, final TransportChannel channel,
                                                         final String action, final RecoveryTransportRequest request) {
 
@@ -28,6 +28,8 @@ public class RecoveryFilesInfoRequest extends RecoveryTransportRequest {
 
     int totalTranslogOps;
 
+    boolean deleteRecoveredFiles;
+
     public RecoveryFilesInfoRequest(StreamInput in) throws IOException {
         super(in);
         recoveryId = in.readLong();
@@ -56,11 +58,17 @@ public RecoveryFilesInfoRequest(StreamInput in) throws IOException {
             phase1ExistingFileSizes.add(in.readVLong());
         }
         totalTranslogOps = in.readVInt();
+
+        if (in.getVersion().onOrAfter(RecoverySettings.SEQ_NO_SNAPSHOT_RECOVERIES_SUPPORTED_VERSION)) {
+            deleteRecoveredFiles = in.readBoolean();
+        } else {
+            deleteRecoveredFiles = false;
+        }
     }
 
     RecoveryFilesInfoRequest(long recoveryId, long requestSeqNo, ShardId shardId, List<String> phase1FileNames,
                              List<Long> phase1FileSizes, List<String> phase1ExistingFileNames, List<Long> phase1ExistingFileSizes,
-                             int totalTranslogOps) {
+                             int totalTranslogOps, boolean deleteRecoveredFiles) {
         super(requestSeqNo);
         this.recoveryId = recoveryId;
         this.shardId = shardId;
@@ -69,6 +77,7 @@ public RecoveryFilesInfoRequest(StreamInput in) throws IOException {
         this.phase1ExistingFileNames = phase1ExistingFileNames;
         this.phase1ExistingFileSizes = phase1ExistingFileSizes;
         this.totalTranslogOps = totalTranslogOps;
+        this.deleteRecoveredFiles = deleteRecoveredFiles;
     }
 
     public long recoveryId() {
@@ -105,5 +114,9 @@ public void writeTo(StreamOutput out) throws IOException {
             out.writeVLong(phase1ExistingFileSize);
         }
         out.writeVInt(totalTranslogOps);
+
+        if (out.getVersion().onOrAfter(RecoverySettings.SEQ_NO_SNAPSHOT_RECOVERIES_SUPPORTED_VERSION)) {
+            out.writeBoolean(deleteRecoveredFiles);
+        }
     }
 }
@@ -30,6 +30,7 @@
 
 public class RecoverySettings {
     public static final Version SNAPSHOT_RECOVERIES_SUPPORTED_VERSION = Version.V_7_15_0;
+    public static final Version SEQ_NO_SNAPSHOT_RECOVERIES_SUPPORTED_VERSION = Version.V_7_16_0;
 
     private static final Logger logger = LogManager.getLogger(RecoverySettings.class);
 
 
@@ -44,6 +44,7 @@
 import org.elasticsearch.core.Releasable;
 import org.elasticsearch.core.Releasables;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.index.engine.Engine;
 import org.elasticsearch.index.engine.RecoveryEngineException;
@@ -560,77 +561,107 @@ void recoverFilesFromSourceAndSnapshot(ShardRecoveryPlan shardRecoveryPlan,
                 phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
         }
 
+        // We need to pass the ShardRecovery plan between steps instead of capturing it in the closures
+        // since the plan can change after a failure recovering files from the snapshots that cannot be
+        // recovered from the source node, in that case we have to start from scratch using the fallback
+        // recovery plan that would be used in subsequent steps.
         final StepListener<Void> sendFileInfoStep = new StepListener<>();
-        final StepListener<List<StoreFileMetadata>> recoverSnapshotFilesStep = new StepListener<>();
-        final StepListener<Void> sendFilesStep = new StepListener<>();
-        final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
-        final StepListener<Void> cleanFilesStep = new StepListener<>();
+        final StepListener<Tuple<ShardRecoveryPlan, List<StoreFileMetadata>>> recoverSnapshotFilesStep = new StepListener<>();
+        final StepListener<ShardRecoveryPlan> sendFilesStep = new StepListener<>();
+        final StepListener<Tuple<ShardRecoveryPlan, RetentionLease>> createRetentionLeaseStep = new StepListener<>();
+        final StepListener<ShardRecoveryPlan> cleanFilesStep = new StepListener<>();
 
         final int translogOps = shardRecoveryPlan.getTranslogOps();
         recoveryTarget.receiveFileInfo(filesToRecoverNames,
             filesToRecoverSizes,
             phase1ExistingFileNames,
             phase1ExistingFileSizes,
             translogOps,
+            false,
             sendFileInfoStep
         );
 
-        sendFileInfoStep.whenComplete(r -> {
-            recoverSnapshotFiles(shardRecoveryPlan, recoverSnapshotFilesStep.delegateResponse((delegate, e) -> {
-                if (shardRecoveryPlan.canRecoverSnapshotFilesFromSourceNode() == false &&
-                    e instanceof CancellableThreads.ExecutionCancelledException == false) {
-                    recoveryTarget.deleteRecoveredFiles(new ActionListener<>() {
-                        @Override
-                        public void onResponse(Void unused) {
-                            recoverFilesFromSourceAndSnapshot(shardRecoveryPlan.getFallbackPlan(), store, stopWatch, listener);
-                        }
+        sendFileInfoStep.whenComplete(unused -> {
+            recoverSnapshotFiles(shardRecoveryPlan, new ActionListener<>() {
+                @Override
+                public void onResponse(List<StoreFileMetadata> filesFailedToRecoverFromSnapshot) {
+                    recoverSnapshotFilesStep.onResponse(Tuple.tuple(shardRecoveryPlan, filesFailedToRecoverFromSnapshot));
+                }
 
-                        @Override
-                        public void onFailure(Exception e) {
-                            listener.onFailure(e);
-                        }
-                    });
-                } else {
-                    delegate.onFailure(e);
+                @Override
+                public void onFailure(Exception e) {
+                    if (shardRecoveryPlan.canRecoverSnapshotFilesFromSourceNode() == false &&
+                        e instanceof CancellableThreads.ExecutionCancelledException == false) {
+                        ShardRecoveryPlan fallbackPlan = shardRecoveryPlan.getFallbackPlan();
+                        recoveryTarget.receiveFileInfo(fallbackPlan.getFilesToRecoverNames(),
+                            fallbackPlan.getFilesToRecoverSizes(),
+                            fallbackPlan.getFilesPresentInTargetNames(),
+                            fallbackPlan.getFilesPresentInTargetSizes(),
+                            fallbackPlan.getTranslogOps(),
+                            true,
+                            recoverSnapshotFilesStep.map(r -> Tuple.tuple(fallbackPlan, Collections.emptyList()))
+                        );
+                    } else {
+                        recoverSnapshotFilesStep.onFailure(e);
+                    }
                 }
-            }));
+            });
         }, listener::onFailure);
 
-        recoverSnapshotFilesStep.whenComplete(filesFailedToRecoverFromSnapshot -> {
+        recoverSnapshotFilesStep.whenComplete(planAndFilesFailedToRecoverFromSnapshot -> {
+            ShardRecoveryPlan recoveryPlan = planAndFilesFailedToRecoverFromSnapshot.v1();
+            List<StoreFileMetadata> filesFailedToRecoverFromSnapshot = planAndFilesFailedToRecoverFromSnapshot.v2();
             final List<StoreFileMetadata> filesToRecoverFromSource;
             if (filesFailedToRecoverFromSnapshot.isEmpty()) {
-                filesToRecoverFromSource = shardRecoveryPlan.getSourceFilesToRecover();
+                filesToRecoverFromSource = recoveryPlan.getSourceFilesToRecover();
             } else {
-                filesToRecoverFromSource = concatLists(shardRecoveryPlan.getSourceFilesToRecover(), filesFailedToRecoverFromSnapshot);
+                filesToRecoverFromSource = concatLists(recoveryPlan.getSourceFilesToRecover(), filesFailedToRecoverFromSnapshot);
             }
 
             sendFiles(store,
-                filesToRecoverFromSource.toArray(new StoreFileMetadata[0]), shardRecoveryPlan::getTranslogOps, sendFilesStep);
+                filesToRecoverFromSource.toArray(new StoreFileMetadata[0]),
+                recoveryPlan::getTranslogOps,
+                sendFilesStep.map(unused -> recoveryPlan)
+            );
         }, listener::onFailure);
 
-        final long startingSeqNo = shardRecoveryPlan.getStartingSeqNo();
-        sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
-
-        final Store.MetadataSnapshot recoverySourceMetadata = shardRecoveryPlan.getSourceMetadataSnapshot();
-        createRetentionLeaseStep.whenComplete(retentionLease ->
-            {
-                final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
-                assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint
-                    : retentionLease + " vs " + lastKnownGlobalCheckpoint;
-                // Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
-                // the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
-                // to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
-                // the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
-                cleanFiles(store, recoverySourceMetadata, () -> translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
-            },
-            listener::onFailure);
+        sendFilesStep.whenComplete(recoveryPlan -> {
+            createRetentionLease(recoveryPlan.getStartingSeqNo(),
+                createRetentionLeaseStep.map(retentionLease -> Tuple.tuple(recoveryPlan, retentionLease))
+            );
+        }, listener::onFailure);
+
+        createRetentionLeaseStep.whenComplete(recoveryPlanAndRetentionLease -> {
+            final ShardRecoveryPlan recoveryPlan = recoveryPlanAndRetentionLease.v1();
+            final RetentionLease retentionLease = recoveryPlanAndRetentionLease.v2();
+            final Store.MetadataSnapshot recoverySourceMetadata = recoveryPlan.getSourceMetadataSnapshot();
+            final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
+            assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint
+                : retentionLease + " vs " + lastKnownGlobalCheckpoint;
+            // Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
+            // the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
+            // to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
+            // the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
+            cleanFiles(store,
+                recoverySourceMetadata,
+                () -> translogOps,
+                lastKnownGlobalCheckpoint,
+                cleanFilesStep.map(unused -> recoveryPlan)
+            );
+        }, listener::onFailure);
 
-        cleanFilesStep.whenComplete(r -> {
+        cleanFilesStep.whenComplete(recoveryPlan -> {
             final TimeValue took = stopWatch.totalTime();
             logger.trace("recovery [phase1]: took [{}]", took);
             listener.onResponse(
-                new SendFileResult(filesToRecoverNames, filesToRecoverSizes, totalSize,
-                    phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took)
+                new SendFileResult(recoveryPlan.getFilesToRecoverNames(),
+                    recoveryPlan.getFilesToRecoverSizes(),
+                    recoveryPlan.getTotalSize(),
+                    recoveryPlan.getFilesPresentInTargetNames(),
+                    recoveryPlan.getFilesPresentInTargetSizes(),
+                    recoveryPlan.getExistingSize(),
+                    took
+                )
             );
         }, listener::onFailure);
     }
 
@@ -417,8 +417,12 @@ public void receiveFileInfo(List<String> phase1FileNames,
                                 List<String> phase1ExistingFileNames,
                                 List<Long> phase1ExistingFileSizes,
                                 int totalTranslogOps,
+                                boolean deleteRecoveredFiles,
                                 ActionListener<Void> listener) {
         ActionListener.completeWith(listener, () -> {
+            if (deleteRecoveredFiles) {
+                multiFileWriter.deleteTempFiles();
+            }
             indexShard.resetRecoveryStage();
             indexShard.prepareForIndexRecovery();
             final RecoveryState.Index index = state().getIndex();
@@ -522,14 +526,6 @@ public void restoreFileFromSnapshot(String repository,
         }
     }
 
-    @Override
-    public void deleteRecoveredFiles(ActionListener<Void> listener) {
-        ActionListener.completeWith(listener, () -> {
-            multiFileWriter.deleteTempFiles();
-            return null;
-        });
-    }
-
     private void registerThrottleTime(long throttleTimeInNanos) {
         state().getIndex().addTargetThrottling(throttleTimeInNanos);
         indexShard.recoveryStats().addThrottleTime(throttleTimeInNanos);
 
@@ -82,6 +82,7 @@ void receiveFileInfo(List<String> phase1FileNames,
                          List<String> phase1ExistingFileNames,
                          List<Long> phase1ExistingFileSizes,
                          int totalTranslogOps,
+                         boolean deleteRecoveredFiles,
                          ActionListener<Void> listener);
 
     /**
@@ -105,15 +106,6 @@ void restoreFileFromSnapshot(String repository,
                                  BlobStoreIndexShardSnapshot.FileInfo snapshotFile,
                                  ActionListener<Void> listener);
 
-    /**
-     * Deletes all the recovered files so far (partially or completely recovered).
-     * This is necessary if we're recovering from a snapshot that doesn't share
-     * the same index files as the source node (i.e. the snapshot was taken before
-     * a primary fail-over) and the recovery fails half-way. In this case we should
-     * delete all the recovered files and start from scratch using the source node.
-     */
-    void deleteRecoveredFiles(ActionListener<Void> listener);
-
     /** writes a partial file chunk to the target store */
     void writeFileChunk(StoreFileMetadata fileMetadata, long position, ReleasableBytesReference content,
                         boolean lastChunk, int totalTranslogOps, ActionListener<Void> listener);