Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,11 @@ private Thread getIndexingThread() {
indexSingleDoc(indexName);
long currentDocCount = indexedDocs.incrementAndGet();
if (currentDocCount > 0 && currentDocCount % refreshFrequency == 0) {
logger.info("--> [iteration {}] flushing index", currentDocCount);
if (rarely()) {
logger.info("--> [iteration {}] flushing index", currentDocCount);
client().admin().indices().prepareFlush(indexName).get();
} else {
logger.info("--> [iteration {}] refreshing index", currentDocCount);
client().admin().indices().prepareRefresh(indexName).get();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ initalMetadataVersion < internalCluster().client()
* After shard relocation completes, shuts down the docrep nodes and asserts remote
* index settings are applied even when the index is in YELLOW state
*/
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13737")
public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Exception {
internalCluster().startClusterManagerOnlyNode();

Expand Down Expand Up @@ -329,6 +330,7 @@ public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Except
* After shard relocation completes, restarts the docrep node holding extra replica shard copy
* and asserts remote index settings are applied as soon as the docrep replica copy is unassigned
*/
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13871")
public void testIndexSettingsUpdatedWhenDocrepNodeIsRestarted() throws Exception {
internalCluster().startClusterManagerOnlyNode();

Expand Down Expand Up @@ -469,6 +471,7 @@ public void testRemotePathMetadataAddedWithFirstPrimaryMovingToRemote() throws E
* exclude docrep nodes, assert that remote index path file exists
* when shards start relocating to the remote nodes.
*/
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13939")
public void testRemoteIndexPathFileExistsAfterMigration() throws Exception {
String docrepClusterManager = internalCluster().startClusterManagerOnlyNode();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@

package org.opensearch.remotemigration;

import org.opensearch.action.DocWriteResponse;
import org.opensearch.action.admin.cluster.health.ClusterHealthRequest;
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
import org.opensearch.action.delete.DeleteResponse;
import org.opensearch.action.index.IndexResponse;
import org.opensearch.client.Client;
import org.opensearch.client.Requests;
import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand;
Expand Down Expand Up @@ -66,8 +63,8 @@ public void testRemotePrimaryRelocation() throws Exception {

AtomicInteger numAutoGenDocs = new AtomicInteger();
final AtomicBoolean finished = new AtomicBoolean(false);
Thread indexingThread = getIndexingThread(finished, numAutoGenDocs);

AsyncIndexingService asyncIndexingService = new AsyncIndexingService("test");
asyncIndexingService.startIndexing();
refresh("test");

// add remote node in mixed mode cluster
Expand Down Expand Up @@ -141,17 +138,19 @@ public void testRemotePrimaryRelocation() throws Exception {
logger.info("--> relocation from remote to remote complete");

finished.set(true);
indexingThread.join();
asyncIndexingService.stopIndexing();
refresh("test");
OpenSearchAssertions.assertHitCount(client().prepareSearch("test").setTrackTotalHits(true).get(), numAutoGenDocs.get());
OpenSearchAssertions.assertHitCount(
client().prepareSearch("test").setTrackTotalHits(true).get(),
asyncIndexingService.getIndexedDocs()
);
OpenSearchAssertions.assertHitCount(
client().prepareSearch("test")
.setTrackTotalHits(true)// extra paranoia ;)
.setQuery(QueryBuilders.termQuery("auto", true))
.get(),
numAutoGenDocs.get()
asyncIndexingService.getIndexedDocs()
);

}

public void testMixedModeRelocation_RemoteSeedingFail() throws Exception {
Expand All @@ -165,9 +164,8 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception {
client().admin().indices().prepareCreate("test").setSettings(indexSettings()).setMapping("field", "type=text").get();
ensureGreen("test");

AtomicInteger numAutoGenDocs = new AtomicInteger();
final AtomicBoolean finished = new AtomicBoolean(false);
Thread indexingThread = getIndexingThread(finished, numAutoGenDocs);
AsyncIndexingService asyncIndexingService = new AsyncIndexingService("test");
asyncIndexingService.startIndexing();

refresh("test");

Expand Down Expand Up @@ -209,27 +207,11 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception {
assertEquals(actionGet.getRelocatingShards(), 0);
assertEquals(docRepNode, primaryNodeName("test"));

finished.set(true);
indexingThread.join();
asyncIndexingService.stopIndexing();
client().admin()
.cluster()
.prepareUpdateSettings()
.setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null))
.get();
}

private static Thread getIndexingThread(AtomicBoolean finished, AtomicInteger numAutoGenDocs) {
Thread indexingThread = new Thread(() -> {
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
}
});
indexingThread.start();
return indexingThread;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,27 @@

package org.opensearch.remotemigration;

import com.carrotsearch.randomizedtesting.generators.RandomNumbers;

import org.opensearch.action.DocWriteResponse;
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
import org.opensearch.action.admin.indices.replication.SegmentReplicationStatsResponse;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.action.delete.DeleteResponse;
import org.opensearch.action.index.IndexResponse;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand;
import org.opensearch.common.Priority;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.index.SegmentReplicationPerGroupStats;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.opensearch.test.hamcrest.OpenSearchAssertions;

import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.TimeUnit;

import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)

public class RemoteReplicaRecoveryIT extends MigrationBaseTestCase {

protected int maximumNumberOfShards() {
Expand All @@ -52,6 +47,7 @@ protected int minimumNumberOfReplicas() {
Brings up new replica copies on remote and docrep nodes, when primary is on a remote node
Live indexing is happening meanwhile
*/
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13473")
public void testReplicaRecovery() throws Exception {
internalCluster().setBootstrapClusterManagerNodeIndex(0);
String primaryNode = internalCluster().startNode();
Expand All @@ -63,10 +59,8 @@ public void testReplicaRecovery() throws Exception {
client().admin().indices().prepareCreate("test").setSettings(indexSettings()).setMapping("field", "type=text").get();
String replicaNode = internalCluster().startNode();
ensureGreen("test");

AtomicInteger numAutoGenDocs = new AtomicInteger();
final AtomicBoolean finished = new AtomicBoolean(false);
Thread indexingThread = getThread(finished, numAutoGenDocs);
AsyncIndexingService asyncIndexingService = new AsyncIndexingService("test");
asyncIndexingService.startIndexing();

refresh("test");

Expand All @@ -78,12 +72,10 @@ public void testReplicaRecovery() throws Exception {
updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store"));
assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet());

String remoteNode2 = internalCluster().startNode();
internalCluster().startNode();
internalCluster().validateClusterFormed();

// identify the primary

Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000));
logger.info("--> relocating primary from {} to {} ", primaryNode, remoteNode);
client().admin()
.cluster()
Expand All @@ -102,7 +94,6 @@ public void testReplicaRecovery() throws Exception {

assertEquals(0, clusterHealthResponse.getRelocatingShards());
logger.info("--> relocation of primary from docrep to remote complete");
Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000));

logger.info("--> getting up the new replicas now to doc rep node as well as remote node ");
// Increase replica count to 3
Expand All @@ -129,52 +120,33 @@ public void testReplicaRecovery() throws Exception {
logger.info("--> replica is up now on another docrep now as well as remote node");

assertEquals(0, clusterHealthResponse.getRelocatingShards());
asyncIndexingService.stopIndexing();
refresh("test");

Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000));
// segrep lag should be zero
assertBusy(() -> {
SegmentReplicationStatsResponse segmentReplicationStatsResponse = dataNodeClient().admin()
.indices()
.prepareSegmentReplicationStats("test")
.setDetailed(true)
.execute()
.actionGet();
SegmentReplicationPerGroupStats perGroupStats = segmentReplicationStatsResponse.getReplicationStats().get("test").get(0);
assertEquals(segmentReplicationStatsResponse.getReplicationStats().size(), 1);
perGroupStats.getReplicaStats().stream().forEach(e -> assertEquals(e.getCurrentReplicationLagMillis(), 0));
}, 20, TimeUnit.SECONDS);

// Stop replicas on docrep now.
// ToDo : Remove once we have dual replication enabled
client().admin()
.indices()
.updateSettings(
new UpdateSettingsRequest("test").settings(
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put("index.routing.allocation.exclude._name", primaryNode + "," + replicaNode)
.build()
)
)
.get();

finished.set(true);
indexingThread.join();
refresh("test");
OpenSearchAssertions.assertHitCount(client().prepareSearch("test").setTrackTotalHits(true).get(), numAutoGenDocs.get());
OpenSearchAssertions.assertHitCount(
client().prepareSearch("test").setTrackTotalHits(true).get(),
asyncIndexingService.getIndexedDocs()
);
OpenSearchAssertions.assertHitCount(
client().prepareSearch("test")
.setTrackTotalHits(true)// extra paranoia ;)
.setQuery(QueryBuilders.termQuery("auto", true))
// .setPreference("_prefer_nodes:" + (remoteNode+ "," + remoteNode2))
.get(),
numAutoGenDocs.get()
asyncIndexingService.getIndexedDocs()
);

}

private Thread getThread(AtomicBoolean finished, AtomicInteger numAutoGenDocs) {
Thread indexingThread = new Thread(() -> {
while (finished.get() == false && numAutoGenDocs.get() < 100) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
logger.info("Indexed {} docs here", numAutoGenDocs.get());
}
});
indexingThread.start();
return indexingThread;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,6 @@ public boolean shouldSeedRemoteStore() {
public Function<String, Boolean> isShardOnRemoteEnabledNode = nodeId -> {
DiscoveryNode node = discoveryNodes.get(nodeId);
if (node != null) {
logger.trace("Node {} has remote_enabled as {}", nodeId, node.isRemoteStoreNode());
return node.isRemoteStoreNode();
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ private void logReplicationFailure(SegmentReplicationState state, ReplicationFai
protected void updateVisibleCheckpoint(long replicationId, IndexShard replicaShard) {
// Update replication checkpoint on source via transport call only supported for remote store integration. For node-
// node communication, checkpoint update is piggy-backed to GET_SEGMENT_FILES transport call
if (replicaShard.indexSettings().isRemoteStoreEnabled() == false) {
if (replicaShard.indexSettings().isAssignedOnRemoteNode() == false) {
return;
}
ShardRouting primaryShard = clusterService.state().routingTable().shardRoutingTable(replicaShard.shardId()).primaryShard();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIn
// make sure the store is not released until we are done.
this.cancellableThreads = new CancellableThreads();
store.incRef();
if (indexShard.indexSettings().isRemoteStoreEnabled()) {
if (indexShard.indexSettings().isAssignedOnRemoteNode()) {
indexShard.remoteStore().incRef();
}
}
Expand Down Expand Up @@ -284,7 +284,7 @@ protected void closeInternal() {
try {
store.decRef();
} finally {
if (indexShard.indexSettings().isRemoteStoreEnabled()) {
if (indexShard.indexSettings().isAssignedOnRemoteNode()) {
indexShard.remoteStore().decRef();
}
}
Expand Down