Add following recovery test

dnhatn · dnhatn · commit b8c1a4e0da35 · 2018-10-17T17:54:39.000-04:00
diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java
@@ -14,7 +14,6 @@
 import org.elasticsearch.action.admin.indices.flush.FlushRequest;
 import org.elasticsearch.action.bulk.BulkItemResponse;
 import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.support.PlainActionFuture;
 import org.elasticsearch.action.support.replication.TransportWriteAction;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -25,16 +24,15 @@
 import org.elasticsearch.common.unit.ByteSizeUnit;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.engine.EngineFactory;
-import org.elasticsearch.index.engine.InternalEngineFactory;
 import org.elasticsearch.index.replication.ESIndexLevelReplicationTestCase;
 import org.elasticsearch.index.seqno.SeqNoStats;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.index.shard.IndexShardTestCase;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.index.translog.Translog;
+import org.elasticsearch.indices.recovery.RecoveryTarget;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xpack.ccr.CcrSettings;
 import org.elasticsearch.xpack.ccr.action.bulk.BulkShardOperationsRequest;
@@ -44,21 +42,21 @@
 import org.elasticsearch.xpack.ccr.index.engine.FollowingEngineFactory;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 import java.util.function.LongConsumer;
 import java.util.stream.Collectors;
 
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.nullValue;
 
@@ -287,56 +285,38 @@ public void testRetryBulkShardOperations() throws Exception {
         }
     }
 
-    /**
-     * This is not a following test; it's mostly copied from RecoveryDuringReplicationTests#testAddNewReplicas.
-     * This test verifies the MSU optimization which is currently only enabled on {@link FollowingEngine}.
-     */
-    public void testAddNewReplicasWithFollowingEngine() throws Exception {
-        Settings settings = Settings.builder()
-            .put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
-            .put(CcrSettings.CCR_FOLLOWING_INDEX_SETTING.getKey(), true)
-            .build();
-        IndexMetaData indexMetaData = buildIndexMetaData(between(0, 1), settings, indexMapping);
-        try (ReplicationGroup shards = new ReplicationGroup(indexMetaData) {
-            @Override
-            protected EngineFactory getEngineFactory(ShardRouting routing) {
-                if (routing.primary()) {
-                    return new InternalEngineFactory(); // use the internal engine so we can index directly
-                } else {
-                    return new FollowingEngineFactory();
+    public void testAddNewFollowingReplica() throws Exception {
+        final byte[] source = "{}".getBytes(StandardCharsets.UTF_8);
+        final int numDocs = between(1, 100);
+        final List<Translog.Operation> operations = new ArrayList<>(numDocs);
+        for (int i = 0; i < numDocs; i++) {
+            operations.add(new Translog.Index("type", Integer.toString(i), i, primaryTerm, 0, source, null, -1));
+        }
+        Future<Void> recoveryFuture = null;
+        try (ReplicationGroup group = createFollowGroup(between(0, 1))) {
+            group.startAll();
+            while (operations.isEmpty() == false) {
+                List<Translog.Operation> bulkOps = randomSubsetOf(between(1, operations.size()), operations);
+                operations.removeAll(bulkOps);
+                BulkShardOperationsRequest bulkRequest = new BulkShardOperationsRequest(group.getPrimary().shardId(),
+                    group.getPrimary().getHistoryUUID(), bulkOps, -1);
+                new CCRAction(bulkRequest, new PlainActionFuture<>(), group).execute();
+                if (randomInt(100) < 10) {
+                    group.getPrimary().flush(new FlushRequest());
+                }
+                if (recoveryFuture == null && (randomInt(100) < 10 || operations.isEmpty())) {
+                    group.getPrimary().sync();
+                    IndexShard newReplica = group.addReplica();
+                    // We need to recover async to release the main thread for the following task to continue
+                    // to fill ops up to the current max_seq_no which the recovering replica is waiting for.
+                    recoveryFuture = group.asyncRecoverReplica(newReplica,
+                        (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, recoveryListener, l -> {}) {});
                 }
             }
-        }) {
-            shards.startAll();
-            Thread[] threads = new Thread[between(1, 3)];
-            AtomicBoolean isStopped = new AtomicBoolean();
-            AtomicInteger docId = new AtomicInteger();
-            for (int i = 0; i < threads.length; i++) {
-                threads[i] = new Thread(() -> {
-                    while (isStopped.get() == false) {
-                        try {
-                            String id = Integer.toString(docId.incrementAndGet());
-                            shards.index(new IndexRequest(index.getName(), "type", id).source("{}", XContentType.JSON));
-                            if (randomInt(100) < 10) {
-                                shards.getPrimary().flush(new FlushRequest());
-                            }
-                        } catch (Exception ex) {
-                            throw new AssertionError(ex);
-                        }
-                    }
-                });
-                threads[i].start();
-            }
-            assertBusy(() -> assertThat(docId.get(), greaterThanOrEqualTo(50)));
-            shards.getPrimary().sync();
-            IndexShard newReplica = shards.addReplica();
-            shards.recoverReplica(newReplica);
-            assertBusy(() -> assertThat(docId.get(), greaterThanOrEqualTo(100)));
-            isStopped.set(true);
-            for (Thread thread : threads) {
-                thread.join();
+            if (recoveryFuture != null) {
+                recoveryFuture.get();
             }
-            assertBusy(() -> assertThat(getDocIdAndSeqNos(newReplica), equalTo(getDocIdAndSeqNos(shards.getPrimary()))));
+            group.assertAllEqual(numDocs);
         }
     }