From 9d381c0a625fb7fd006dfc9c4a7b8d4cfc5f2e1d Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Tue, 3 May 2022 09:18:57 -0700 Subject: [PATCH 01/10] RecoveryState inherits from ReplicationState + RecoveryTarget inherits from ReplicationTarget Signed-off-by: Poojita Raj --- .../opensearch/index/shard/IndexShard.java | 11 +- .../opensearch/indices/IndicesService.java | 3 +- .../cluster/IndicesClusterStateService.java | 15 +- .../recovery/PeerRecoveryTargetService.java | 18 +- .../recovery/RecoveriesCollection.java | 39 ++-- .../indices/recovery/RecoveryState.java | 35 +--- .../indices/recovery/RecoveryTarget.java | 133 ++++--------- .../common/ReplicationListener.java | 18 ++ .../replication/common/ReplicationState.java | 83 ++++++++ .../replication/common/ReplicationTarget.java | 184 ++++++++++++++++++ .../RecoveryDuringReplicationTests.java | 4 +- ...actIndicesClusterStateServiceTestCase.java | 3 +- .../indices/recovery/RecoveryTests.java | 26 +-- .../recovery/RecoveriesCollectionTests.java | 44 ++--- .../index/shard/IndexShardTestCase.java | 9 +- 15 files changed, 407 insertions(+), 218 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java create mode 100644 server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java create mode 100644 server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 0f088a13d5c5a..d12f7c6505f5d 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -159,6 +159,7 @@ import org.opensearch.indices.recovery.RecoveryFailedException; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.rest.RestStatus; @@ -2876,7 +2877,7 @@ protected Engine getEngineOrNull() { public void startRecovery( RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, - PeerRecoveryTargetService.RecoveryListener recoveryListener, + ReplicationListener recoveryListener, RepositoriesService repositoriesService, Consumer mappingUpdateConsumer, IndicesService indicesService @@ -2909,7 +2910,7 @@ public void startRecovery( recoveryTargetService.startRecovery(this, recoveryState.getSourceNode(), recoveryListener); } catch (Exception e) { failShard("corrupted preexisting index", e); - recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true); + recoveryListener.onFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true); } break; case SNAPSHOT: @@ -2984,15 +2985,15 @@ public void startRecovery( private void executeRecovery( String reason, RecoveryState recoveryState, - PeerRecoveryTargetService.RecoveryListener recoveryListener, + ReplicationListener recoveryListener, CheckedConsumer, Exception> action ) { markAsRecovering(reason, recoveryState); // mark the shard as recovering on the cluster state thread threadPool.generic().execute(ActionRunnable.wrap(ActionListener.wrap(r -> { if (r) { - recoveryListener.onRecoveryDone(recoveryState); + recoveryListener.onDone(recoveryState); } - }, e -> recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true)), action)); + }, e -> recoveryListener.onFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true)), action)); } /** diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index b5da0ae1f7688..f832afc59b469 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -137,6 +137,7 @@ import org.opensearch.indices.mapper.MapperRegistry; import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.node.Node; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; @@ -839,7 +840,7 @@ public synchronized void verifyIndexMetadata(IndexMetadata metadata, IndexMetada public IndexShard createShard( final ShardRouting shardRouting, final PeerRecoveryTargetService recoveryTargetService, - final PeerRecoveryTargetService.RecoveryListener recoveryListener, + final ReplicationListener recoveryListener, final RepositoriesService repositoriesService, final Consumer onShardFailure, final Consumer globalCheckpointSyncer, diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index 29f74f8a86d85..766deb0eed82a 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchException; import org.opensearch.ResourceAlreadyExistsException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterChangedEvent; @@ -78,8 +79,9 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.PeerRecoverySourceService; import org.opensearch.indices.recovery.PeerRecoveryTargetService; -import org.opensearch.indices.recovery.RecoveryFailedException; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; @@ -739,7 +741,7 @@ private static DiscoveryNode findSourceNodeForPeerRecovery( return sourceNode; } - private class RecoveryListener implements PeerRecoveryTargetService.RecoveryListener { + private class RecoveryListener implements ReplicationListener { /** * ShardRouting with which the shard was created @@ -757,12 +759,13 @@ private RecoveryListener(final ShardRouting shardRouting, final long primaryTerm } @Override - public void onRecoveryDone(final RecoveryState state) { - shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + state.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); + public void onDone(ReplicationState state) { + RecoveryState RecState = (RecoveryState) state; + shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + RecState.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); } @Override - public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { handleRecoveryFailure(shardRouting, sendShardFailure, e); } } @@ -1004,7 +1007,7 @@ U createIndex(IndexMetadata indexMetadata, List builtInIndex T createShard( ShardRouting shardRouting, PeerRecoveryTargetService recoveryTargetService, - PeerRecoveryTargetService.RecoveryListener recoveryListener, + ReplicationListener recoveryListener, RepositoriesService repositoriesService, Consumer onShardFailure, Consumer globalCheckpointSyncer, diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 4ae188abe5896..033dad7f312d2 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -70,6 +70,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogCorruptedException; import org.opensearch.indices.recovery.RecoveriesCollection.RecoveryRef; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.tasks.Task; @@ -189,7 +190,7 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh } } - public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final RecoveryListener listener) { + public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final ReplicationListener listener) { // create a new recovery status, and process... final long recoveryId = onGoingRecoveries.startRecovery(indexShard, sourceNode, listener, recoverySettings.activityTimeout()); // we fork off quickly here and go async but this is called from the cluster state applier thread too and that can cause @@ -210,7 +211,7 @@ protected void retryRecovery(final long recoveryId, final String reason, TimeVal private void retryRecovery(final long recoveryId, final TimeValue retryAfter, final TimeValue activityTimeout) { RecoveryTarget newTarget = onGoingRecoveries.resetRecovery(recoveryId, activityTimeout); if (newTarget != null) { - threadPool.scheduleUnlessShuttingDown(retryAfter, ThreadPool.Names.GENERIC, new RecoveryRunner(newTarget.recoveryId())); + threadPool.scheduleUnlessShuttingDown(retryAfter, ThreadPool.Names.GENERIC, new RecoveryRunner(newTarget.getId())); } } @@ -339,23 +340,12 @@ public static StartRecoveryRequest getStartRecoveryRequest( localNode, metadataSnapshot, recoveryTarget.state().getPrimary(), - recoveryTarget.recoveryId(), + recoveryTarget.getId(), startingSeqNo ); return request; } - /** - * The recovery listener - * - * @opensearch.internal - */ - public interface RecoveryListener { - void onRecoveryDone(RecoveryState state); - - void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure); - } - class PrepareForTranslogOperationsRequestHandler implements TransportRequestHandler { @Override diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java index 38b72dd0f7dee..9c42e3f32e166 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java @@ -43,6 +43,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardClosedException; import org.opensearch.index.shard.ShardId; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.threadpool.ThreadPool; import java.util.ArrayList; @@ -76,28 +77,18 @@ public RecoveriesCollection(Logger logger, ThreadPool threadPool) { * * @return the id of the new recovery. */ - public long startRecovery( - IndexShard indexShard, - DiscoveryNode sourceNode, - PeerRecoveryTargetService.RecoveryListener listener, - TimeValue activityTimeout - ) { + public long startRecovery(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener, TimeValue activityTimeout) { RecoveryTarget recoveryTarget = new RecoveryTarget(indexShard, sourceNode, listener); startRecoveryInternal(recoveryTarget, activityTimeout); - return recoveryTarget.recoveryId(); + return recoveryTarget.getId(); } private void startRecoveryInternal(RecoveryTarget recoveryTarget, TimeValue activityTimeout) { - RecoveryTarget existingTarget = onGoingRecoveries.putIfAbsent(recoveryTarget.recoveryId(), recoveryTarget); + RecoveryTarget existingTarget = onGoingRecoveries.putIfAbsent(recoveryTarget.getId(), recoveryTarget); assert existingTarget == null : "found two RecoveryStatus instances with the same id"; - logger.trace( - "{} started recovery from {}, id [{}]", - recoveryTarget.shardId(), - recoveryTarget.sourceNode(), - recoveryTarget.recoveryId() - ); + logger.trace("{} started recovery from {}, id [{}]", recoveryTarget.shardId(), recoveryTarget.sourceNode(), recoveryTarget.getId()); threadPool.schedule( - new RecoveryMonitor(recoveryTarget.recoveryId(), recoveryTarget.lastAccessTime(), activityTimeout), + new RecoveryMonitor(recoveryTarget.getId(), recoveryTarget.lastAccessTime(), activityTimeout), activityTimeout, ThreadPool.Names.GENERIC ); @@ -133,8 +124,8 @@ public RecoveryTarget resetRecovery(final long recoveryId, final TimeValue activ "{} restarted recovery from {}, id [{}], previous id [{}]", newRecoveryTarget.shardId(), newRecoveryTarget.sourceNode(), - newRecoveryTarget.recoveryId(), - oldRecoveryTarget.recoveryId() + newRecoveryTarget.getId(), + oldRecoveryTarget.getId() ); return newRecoveryTarget; } else { @@ -142,10 +133,10 @@ public RecoveryTarget resetRecovery(final long recoveryId, final TimeValue activ "{} recovery could not be reset as it is already cancelled, recovery from {}, id [{}], previous id [{}]", newRecoveryTarget.shardId(), newRecoveryTarget.sourceNode(), - newRecoveryTarget.recoveryId(), - oldRecoveryTarget.recoveryId() + newRecoveryTarget.getId(), + oldRecoveryTarget.getId() ); - cancelRecovery(newRecoveryTarget.recoveryId(), "recovery cancelled during reset"); + cancelRecovery(newRecoveryTarget.getId(), "recovery cancelled during reset"); return null; } } catch (Exception e) { @@ -193,7 +184,7 @@ public boolean cancelRecovery(long id, String reason) { "{} canceled recovery from {}, id [{}] (reason [{}])", removed.shardId(), removed.sourceNode(), - removed.recoveryId(), + removed.getId(), reason ); removed.cancel(reason); @@ -216,7 +207,7 @@ public void failRecovery(long id, RecoveryFailedException e, boolean sendShardFa "{} failing recovery from {}, id [{}]. Send shard failure: [{}]", removed.shardId(), removed.sourceNode(), - removed.recoveryId(), + removed.getId(), sendShardFailure ); removed.fail(e, sendShardFailure); @@ -227,7 +218,7 @@ public void failRecovery(long id, RecoveryFailedException e, boolean sendShardFa public void markRecoveryAsDone(long id) { RecoveryTarget removed = onGoingRecoveries.remove(id); if (removed != null) { - logger.trace("{} marking recovery from {} as done, id [{}]", removed.shardId(), removed.sourceNode(), removed.recoveryId()); + logger.trace("{} marking recovery from {} as done, id [{}]", removed.shardId(), removed.sourceNode(), removed.getId()); removed.markAsDone(); } } @@ -261,7 +252,7 @@ public boolean cancelRecoveriesForShard(ShardId shardId, String reason) { "{} canceled recovery from {}, id [{}] (reason [{}])", removed.shardId(), removed.sourceNode(), - removed.recoveryId(), + removed.getId(), reason ); removed.cancel(reason); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java index 35ac5cbc12bde..d2303f950e154 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java @@ -46,6 +46,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.indices.replication.common.ReplicationTimer; import java.io.IOException; @@ -56,7 +57,7 @@ * * @opensearch.internal */ -public class RecoveryState implements ToXContentFragment, Writeable { +public class RecoveryState extends ReplicationState implements ToXContentFragment, Writeable { /** * The stage of the recovery state @@ -117,10 +118,8 @@ public static Stage fromId(byte id) { private Stage stage; - private final ReplicationLuceneIndex index; private final Translog translog; private final VerifyIndex verifyIndex; - private final ReplicationTimer timer; private RecoverySource recoverySource; private ShardId shardId; @@ -157,13 +156,12 @@ public RecoveryState( } public RecoveryState(StreamInput in) throws IOException { - timer = new ReplicationTimer(in); + super(in); stage = Stage.fromId(in.readByte()); shardId = new ShardId(in); recoverySource = RecoverySource.readFrom(in); targetNode = new DiscoveryNode(in); sourceNode = in.readOptionalWriteable(DiscoveryNode::new); - index = new ReplicationLuceneIndex(in); translog = new Translog(in); verifyIndex = new VerifyIndex(in); primary = in.readBoolean(); @@ -171,13 +169,12 @@ public RecoveryState(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { - timer.writeTo(out); + super.writeTo(out); out.writeByte(stage.id()); shardId.writeTo(out); recoverySource.writeTo(out); targetNode.writeTo(out); out.writeOptionalWriteable(sourceNode); - index.writeTo(out); translog.writeTo(out); verifyIndex.writeTo(out); out.writeBoolean(primary); @@ -247,10 +244,6 @@ public synchronized RecoveryState setStage(Stage stage) { return this; } - public ReplicationLuceneIndex getIndex() { - return index; - } - public VerifyIndex getVerifyIndex() { return this.verifyIndex; } @@ -259,10 +252,6 @@ public Translog getTranslog() { return translog; } - public ReplicationTimer getTimer() { - return timer; - } - public RecoverySource getRecoverySource() { return recoverySource; } @@ -286,16 +275,11 @@ public boolean getPrimary() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + super.toXContent(builder, params); builder.field(Fields.ID, shardId.id()); builder.field(Fields.TYPE, recoverySource.getType()); builder.field(Fields.STAGE, stage.toString()); builder.field(Fields.PRIMARY, primary); - builder.timeField(Fields.START_TIME_IN_MILLIS, Fields.START_TIME, timer.startTime()); - if (timer.stopTime() > 0) { - builder.timeField(Fields.STOP_TIME_IN_MILLIS, Fields.STOP_TIME, timer.stopTime()); - } - builder.humanReadableField(Fields.TOTAL_TIME_IN_MILLIS, Fields.TOTAL_TIME, new TimeValue(timer.time())); - if (recoverySource.getType() == RecoverySource.Type.PEER) { builder.startObject(Fields.SOURCE); builder.field(Fields.ID, sourceNode.getId()); @@ -318,10 +302,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(Fields.NAME, targetNode.getName()); builder.endObject(); - builder.startObject(Fields.INDEX); - index.toXContent(builder, params); - builder.endObject(); - builder.startObject(Fields.TRANSLOG); translog.toXContent(builder, params); builder.endObject(); @@ -343,10 +323,6 @@ static final class Fields { static final String TYPE = "type"; static final String STAGE = "stage"; static final String PRIMARY = "primary"; - static final String START_TIME = "start_time"; - static final String START_TIME_IN_MILLIS = "start_time_in_millis"; - static final String STOP_TIME = "stop_time"; - static final String STOP_TIME_IN_MILLIS = "stop_time_in_millis"; static final String TOTAL_TIME = "total_time"; static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; static final String SOURCE = "source"; @@ -355,7 +331,6 @@ static final class Fields { static final String IP = "ip"; static final String NAME = "name"; static final String TARGET = "target"; - static final String INDEX = "index"; static final String TRANSLOG = "translog"; static final String TOTAL_ON_START = "total_on_start"; static final String VERIFY_INDEX = "verify_index"; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index bb557cc6837ab..1a1de4beb5720 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -32,7 +32,6 @@ package org.opensearch.indices.recovery; -import org.apache.logging.log4j.Logger; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; @@ -42,12 +41,9 @@ import org.opensearch.action.ActionListener; import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.common.UUIDs; import org.opensearch.common.bytes.BytesReference; -import org.opensearch.common.logging.Loggers; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.util.CancellableThreads; -import org.opensearch.common.util.concurrent.AbstractRefCounted; import org.opensearch.index.engine.Engine; import org.opensearch.index.mapper.MapperException; import org.opensearch.index.seqno.ReplicationTracker; @@ -60,14 +56,14 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationTarget; import java.io.IOException; import java.nio.file.Path; import java.util.List; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; /** * Represents a recovery where the current node is the target node of the recovery. To track recoveries in a central place, instances of @@ -75,30 +71,14 @@ * * @opensearch.internal */ -public class RecoveryTarget extends AbstractRefCounted implements RecoveryTargetHandler { - - private final Logger logger; - - private static final AtomicLong idGenerator = new AtomicLong(); +public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetHandler { private static final String RECOVERY_PREFIX = "recovery."; private final ShardId shardId; - private final long recoveryId; - private final IndexShard indexShard; private final DiscoveryNode sourceNode; - private final MultiFileWriter multiFileWriter; - private final RecoveryRequestTracker requestTracker = new RecoveryRequestTracker(); - private final Store store; - private final PeerRecoveryTargetService.RecoveryListener listener; - - private final AtomicBoolean finished = new AtomicBoolean(); - private final CancellableThreads cancellableThreads; - // last time this status was accessed - private volatile long lastAccessTime = System.nanoTime(); - // latch that can be used to blockingly wait for RecoveryTarget to be closed private final CountDownLatch closedLatch = new CountDownLatch(1); @@ -109,26 +89,11 @@ public class RecoveryTarget extends AbstractRefCounted implements RecoveryTarget * @param sourceNode source node of the recovery where we recover from * @param listener called when recovery is completed/failed */ - public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, PeerRecoveryTargetService.RecoveryListener listener) { - super("recovery_status"); + public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener) { + super("recovery_status", indexShard, indexShard.recoveryState().getIndex(), listener); + this.shardId = indexShard.shardId(); this.cancellableThreads = new CancellableThreads(); - this.recoveryId = idGenerator.incrementAndGet(); - this.listener = listener; - this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); - this.indexShard = indexShard; this.sourceNode = sourceNode; - this.shardId = indexShard.shardId(); - final String tempFilePrefix = RECOVERY_PREFIX + UUIDs.randomBase64UUID() + "."; - this.multiFileWriter = new MultiFileWriter( - indexShard.store(), - indexShard.recoveryState().getIndex(), - tempFilePrefix, - logger, - this::ensureRefCount - ); - this.store = indexShard.store(); - // make sure the store is not released until we are done. - store.incRef(); indexShard.recoveryStats().incCurrentAsTarget(); } @@ -141,14 +106,6 @@ public RecoveryTarget retryCopy() { return new RecoveryTarget(indexShard, sourceNode, listener); } - public ActionListener markRequestReceivedAndCreateListener(long requestSeqNo, ActionListener listener) { - return requestTracker.markReceivedAndCreateListener(requestSeqNo, listener); - } - - public long recoveryId() { - return recoveryId; - } - public ShardId shardId() { return shardId; } @@ -170,16 +127,6 @@ public CancellableThreads cancellableThreads() { return cancellableThreads; } - /** return the last time this RecoveryStatus was used (based on System.nanoTime() */ - public long lastAccessTime() { - return lastAccessTime; - } - - /** sets the lasAccessTime flag to now */ - public void setLastAccessTime() { - lastAccessTime = System.nanoTime(); - } - public Store store() { ensureRefCount(); return store; @@ -190,9 +137,10 @@ public Store store() { * Returns true if resetting the recovery was successful, false if the recovery target is already cancelled / failed or marked as done. */ boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException { + final long recoveryId = getId(); if (finished.compareAndSet(false, true)) { try { - logger.debug("reset of recovery with shard {} and id [{}]", shardId, recoveryId); + logger.debug("reset of recovery with shard {} and id [{}]", indexShard.shardId(), recoveryId); } finally { // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now. decRef(); @@ -202,7 +150,7 @@ boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOE } catch (CancellableThreads.ExecutionCancelledException e) { logger.trace( "new recovery target cancelled for shard {} while waiting on old recovery target with id [{}] to close", - shardId, + indexShard.shardId(), recoveryId ); return false; @@ -248,22 +196,11 @@ public void cancel(String reason) { * @param sendShardFailure indicates whether to notify the cluster-manager of the shard failure */ public void fail(RecoveryFailedException e, boolean sendShardFailure) { - if (finished.compareAndSet(false, true)) { - try { - notifyListener(e, sendShardFailure); - } finally { - try { - cancellableThreads.cancel("failed recovery [" + ExceptionsHelper.stackTrace(e) + "]"); - } finally { - // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now - decRef(); - } - } - } + super.fail(e, sendShardFailure); } public void notifyListener(RecoveryFailedException e, boolean sendShardFailure) { - listener.onRecoveryFailure(state(), e, sendShardFailure); + listener.onFailure(state(), e, sendShardFailure); } /** mark the current recovery as done */ @@ -278,17 +215,15 @@ public void markAsDone() { // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now decRef(); } - listener.onRecoveryDone(state()); + listener.onDone(state()); } } @Override protected void closeInternal() { try { - multiFileWriter.close(); + super.closeInternal(); } finally { - // free store. increment happens in constructor - store.decRef(); indexShard.recoveryStats().decCurrentAsTarget(); closedLatch.countDown(); } @@ -296,15 +231,30 @@ protected void closeInternal() { @Override public String toString() { - return shardId + " [" + recoveryId + "]"; + return indexShard.shardId() + " [" + getId() + "]"; } - private void ensureRefCount() { - if (refCount() <= 0) { - throw new OpenSearchException( - "RecoveryStatus is used but it's refcount is 0. Probably a mismatch between incRef/decRef " + "calls" - ); - } + @Override + protected String getPrefix() { + return RECOVERY_PREFIX; + } + + @Override + protected void onDone() { + indexShard.postRecovery("peer recovery done"); + } + + /** + * if {@link #cancellableThreads()} was used, the threads will be interrupted. + */ + @Override + protected void onCancel(String reason) { + cancellableThreads.cancel(reason); + } + + @Override + protected void onFail(OpenSearchException e, boolean sendShardFailure) { + cancellableThreads.cancel("failed recovery [" + ExceptionsHelper.stackTrace(e) + "]"); } /*** Implementation of {@link RecoveryTargetHandler } */ @@ -374,7 +324,7 @@ public void indexTranslogOperations( translog.totalOperations(totalTranslogOps); assert indexShard().recoveryState() == state(); if (indexShard().state() != IndexShardState.RECOVERING) { - throw new IndexShardNotRecoveringException(shardId, indexShard().state()); + throw new IndexShardNotRecoveringException(indexShard.shardId(), indexShard().state()); } /* * The maxSeenAutoIdTimestampOnPrimary received from the primary is at least the highest auto_id_timestamp from any operation @@ -460,7 +410,7 @@ public void cleanFiles( final String translogUUID = Translog.createEmptyTranslog( indexShard.shardPath().resolveTranslog(), globalCheckpoint, - shardId, + indexShard.shardId(), indexShard.getPendingPrimaryTerm() ); store.associateIndexWithNewTranslog(translogUUID); @@ -513,13 +463,8 @@ public void writeFileChunk( int totalTranslogOps, ActionListener listener ) { - try { - state().getTranslog().totalOperations(totalTranslogOps); - multiFileWriter.writeFileChunk(fileMetadata, position, content, lastChunk); - listener.onResponse(null); - } catch (Exception e) { - listener.onFailure(e); - } + state().getTranslog().totalOperations(totalTranslogOps); + this.writeFileChunk(fileMetadata, position, content, lastChunk, listener); } /** Get a temporary name for the provided file name. */ diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java new file mode 100644 index 0000000000000..f0972176169bd --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.common; + +import org.opensearch.OpenSearchException; + +public interface ReplicationListener { + + void onDone(ReplicationState state); + + void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure); +} diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java new file mode 100644 index 0000000000000..86ceff4fc9eb7 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.common; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.ToXContentFragment; +import org.opensearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +public class ReplicationState implements ToXContentFragment, Writeable { + + protected ReplicationTimer timer; + protected ReplicationLuceneIndex index; + + protected ReplicationState() { + // Empty default constructor for subclasses + } + + protected ReplicationState(ReplicationLuceneIndex index) { + this.index = index; + timer = new ReplicationTimer(); + timer.start(); + } + + public ReplicationTimer getTimer() { + return timer; + } + + public ReplicationLuceneIndex getIndex() { + return index; + } + + public ReplicationState(StreamInput in) throws IOException { + timer = new ReplicationTimer(in); + index = new ReplicationLuceneIndex(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + timer.writeTo(out); + index.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.timeField(ReplicationState.Fields.START_TIME_IN_MILLIS, ReplicationState.Fields.START_TIME, timer.startTime()); + if (timer.stopTime() > 0) { + builder.timeField(ReplicationState.Fields.STOP_TIME_IN_MILLIS, ReplicationState.Fields.STOP_TIME, timer.stopTime()); + } + builder.humanReadableField( + ReplicationState.Fields.TOTAL_TIME_IN_MILLIS, + ReplicationState.Fields.TOTAL_TIME, + new TimeValue(timer.time()) + ); + + builder.startObject(ReplicationState.Fields.INDEX); + index.toXContent(builder, params); + builder.endObject(); + + return builder; + } + + static final class Fields { + static final String START_TIME = "start_time"; + static final String START_TIME_IN_MILLIS = "start_time_in_millis"; + static final String STOP_TIME = "stop_time"; + static final String STOP_TIME_IN_MILLIS = "stop_time_in_millis"; + static final String TOTAL_TIME = "total_time"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String INDEX = "index"; + } + +} diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java new file mode 100644 index 0000000000000..c457ee92d912f --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -0,0 +1,184 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication.common; + +import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchException; +import org.opensearch.action.ActionListener; +import org.opensearch.common.UUIDs; +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.logging.Loggers; +import org.opensearch.common.util.concurrent.AbstractRefCounted; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.store.Store; +import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.indices.recovery.MultiFileWriter; +import org.opensearch.indices.recovery.RecoveryRequestTracker; + +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public abstract class ReplicationTarget extends AbstractRefCounted { + + // TODO will this cause issues because its shared between subclasses? + private static final AtomicLong ID_GENERATOR = new AtomicLong(); + + // last time the target/status was accessed + private volatile long lastAccessTime = System.nanoTime(); + private final RecoveryRequestTracker requestTracker = new RecoveryRequestTracker(); + private final long id; + + protected final AtomicBoolean finished = new AtomicBoolean(); + protected final IndexShard indexShard; + protected final Store store; + protected final ReplicationListener listener; + protected final MultiFileWriter multiFileWriter; + protected final Logger logger; + protected final ReplicationLuceneIndex recoveryStateIndex; + + protected abstract String getPrefix(); + + protected abstract void onDone(); + + protected abstract void onCancel(String reason); + + protected abstract void onFail(OpenSearchException e, boolean sendShardFailure); + + public abstract ReplicationState state(); + + public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ReplicationListener listener) { + super(name); + this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); + this.listener = listener; + this.id = ID_GENERATOR.incrementAndGet(); + this.recoveryStateIndex = recoveryStateIndex; + this.indexShard = indexShard; + this.store = indexShard.store(); + final String tempFilePrefix = getPrefix() + UUIDs.randomBase64UUID() + "."; + this.multiFileWriter = new MultiFileWriter(indexShard.store(), recoveryStateIndex, tempFilePrefix, logger, this::ensureRefCount); + // make sure the store is not released until we are done. + store.incRef(); + } + + public long getId() { + return id; + } + + /** + * return the last time this RecoveryStatus was used (based on System.nanoTime() + */ + public long lastAccessTime() { + return lastAccessTime; + } + + /** + * sets the lasAccessTime flag to now + */ + public void setLastAccessTime() { + lastAccessTime = System.nanoTime(); + } + + public ActionListener markRequestReceivedAndCreateListener(long requestSeqNo, ActionListener listener) { + return requestTracker.markReceivedAndCreateListener(requestSeqNo, listener); + } + + public IndexShard indexShard() { + ensureRefCount(); + return indexShard; + } + + public Store store() { + ensureRefCount(); + return store; + } + + public void writeFileChunk( + StoreFileMetadata fileMetadata, + long position, + BytesReference content, + boolean lastChunk, + ActionListener actionListener + ) { + try { + multiFileWriter.writeFileChunk(fileMetadata, position, content, lastChunk); + actionListener.onResponse(null); + } catch (Exception e) { + actionListener.onFailure(e); + } + } + + /** + * mark the current recovery as done + */ + public void markAsDone() { + if (finished.compareAndSet(false, true)) { + try { + onDone(); + } finally { + // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + decRef(); + } + listener.onDone(state()); + } + } + + /** + * cancel the recovery. calling this method will clean temporary files and release the store + * unless this object is in use (in which case it will be cleaned once all ongoing users call + * {@link #decRef()} + */ + public void cancel(String reason) { + if (finished.compareAndSet(false, true)) { + try { + logger.debug("recovery canceled (reason: [{}])", reason); + onCancel(reason); + } finally { + // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + decRef(); + } + } + } + + /** + * fail the recovery and call listener + * + * @param e exception that encapsulating the failure + * @param sendShardFailure indicates whether to notify the master of the shard failure + */ + public void fail(OpenSearchException e, boolean sendShardFailure) { + if (finished.compareAndSet(false, true)) { + try { + listener.onFailure(state(), e, sendShardFailure); + } finally { + try { + onFail(e, sendShardFailure); + } finally { + // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + decRef(); + } + } + } + } + + protected void ensureRefCount() { + if (refCount() <= 0) { + throw new OpenSearchException("RecoveryStatus is used but it's refcount is 0. Probably a mismatch between incRef/decRef calls"); + } + } + + @Override + protected void closeInternal() { + try { + multiFileWriter.close(); + } finally { + // free store. increment happens in constructor + store.decRef(); + } + } +} diff --git a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java index add2ecd34e3af..509d1f52daa0d 100644 --- a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java @@ -69,9 +69,9 @@ import org.opensearch.index.shard.PrimaryReplicaSyncer; import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.common.ReplicationListener; import java.io.IOException; import java.util.ArrayList; @@ -809,7 +809,7 @@ public BlockingTarget( CountDownLatch releaseRecovery, IndexShard shard, DiscoveryNode sourceNode, - PeerRecoveryTargetService.RecoveryListener listener, + ReplicationListener listener, Logger logger ) { super(shard, sourceNode, listener); diff --git a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java index 9d83071c177f5..6ba75d504dab4 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java +++ b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java @@ -57,6 +57,7 @@ import org.opensearch.indices.cluster.IndicesClusterStateService.Shard; import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.repositories.RepositoriesService; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -253,7 +254,7 @@ public MockIndexService indexService(Index index) { public MockIndexShard createShard( final ShardRouting shardRouting, final PeerRecoveryTargetService recoveryTargetService, - final PeerRecoveryTargetService.RecoveryListener recoveryListener, + final ReplicationListener recoveryListener, final RepositoriesService repositoriesService, final Consumer onShardFailure, final Consumer globalCheckpointSyncer, diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java index 5e09e0f2253df..5224a54a35e96 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.ExceptionsHelper; +import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.action.bulk.BulkShardRequest; @@ -68,6 +69,8 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.SnapshotMatchers; import org.opensearch.index.translog.Translog; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import java.io.IOException; import java.util.HashMap; @@ -448,20 +451,17 @@ public long addDocument(Iterable doc) throws IOExcepti IndexShard replica = group.addReplica(); expectThrows( Exception.class, - () -> group.recoverReplica( - replica, - (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new PeerRecoveryTargetService.RecoveryListener() { - @Override - public void onRecoveryDone(RecoveryState state) { - throw new AssertionError("recovery must fail"); - } + () -> group.recoverReplica(replica, (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new ReplicationListener() { + @Override + public void onDone(ReplicationState state) { + throw new AssertionError("recovery must fail"); + } - @Override - public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) { - assertThat(ExceptionsHelper.unwrap(e, IOException.class).getMessage(), equalTo("simulated")); - } - }) - ) + @Override + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + assertThat(ExceptionsHelper.unwrap(e, IOException.class).getMessage(), equalTo("simulated")); + } + })) ); expectThrows(AlreadyClosedException.class, () -> replica.refresh("test")); group.removeReplica(replica); diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java index 6a08f5115d1e2..dec9374830c0e 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java @@ -39,10 +39,10 @@ import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; import org.opensearch.indices.recovery.RecoveriesCollection; -import org.opensearch.indices.recovery.RecoveryFailedException; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -52,14 +52,14 @@ import static org.hamcrest.Matchers.lessThan; public class RecoveriesCollectionTests extends OpenSearchIndexLevelReplicationTestCase { - static final PeerRecoveryTargetService.RecoveryListener listener = new PeerRecoveryTargetService.RecoveryListener() { + static final ReplicationListener listener = new ReplicationListener() { @Override - public void onRecoveryDone(RecoveryState state) { + public void onDone(ReplicationState state) { } @Override - public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { } }; @@ -86,24 +86,18 @@ public void testRecoveryTimeout() throws Exception { final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool); final AtomicBoolean failed = new AtomicBoolean(); final CountDownLatch latch = new CountDownLatch(1); - final long recoveryId = startRecovery( - collection, - shards.getPrimaryNode(), - shards.addReplica(), - new PeerRecoveryTargetService.RecoveryListener() { - @Override - public void onRecoveryDone(RecoveryState state) { - latch.countDown(); - } - - @Override - public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) { - failed.set(true); - latch.countDown(); - } - }, - TimeValue.timeValueMillis(100) - ); + final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new ReplicationListener() { + @Override + public void onDone(ReplicationState state) { + latch.countDown(); + } + + @Override + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + failed.set(true); + latch.countDown(); + } + }, TimeValue.timeValueMillis(100)); try { latch.await(30, TimeUnit.SECONDS); assertTrue("recovery failed to timeout", failed.get()); @@ -145,7 +139,7 @@ public void testResetRecovery() throws Exception { Store store = recoveryTarget.store(); String tempFileName = recoveryTarget.getTempNameForFile("foobar"); RecoveryTarget resetRecovery = collection.resetRecovery(recoveryId, TimeValue.timeValueMinutes(60)); - final long resetRecoveryId = resetRecovery.recoveryId(); + final long resetRecoveryId = resetRecovery.getId(); assertNotSame(recoveryTarget, resetRecovery); assertNotSame(recoveryTarget.cancellableThreads(), resetRecovery.cancellableThreads()); assertSame(indexShard, resetRecovery.indexShard()); @@ -177,7 +171,7 @@ long startRecovery( RecoveriesCollection collection, DiscoveryNode sourceNode, IndexShard indexShard, - PeerRecoveryTargetService.RecoveryListener listener, + ReplicationListener listener, TimeValue timeValue ) { final DiscoveryNode rNode = getDiscoveryNode(indexShard.routingEntry().currentNodeId()); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 509edfd1b9103..298fdcaea6465 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.store.Directory; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.action.index.IndexRequest; @@ -93,6 +94,8 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.recovery.StartRecoveryRequest; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.OpenSearchBlobStoreRepositoryIntegTestCase; @@ -138,14 +141,14 @@ public abstract class IndexShardTestCase extends OpenSearchTestCase { } }; - protected static final PeerRecoveryTargetService.RecoveryListener recoveryListener = new PeerRecoveryTargetService.RecoveryListener() { + protected static final ReplicationListener recoveryListener = new ReplicationListener() { @Override - public void onRecoveryDone(RecoveryState state) { + public void onDone(ReplicationState state) { } @Override - public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { throw new AssertionError(e); } }; From ca0106a24d4901a42367c4eb849b5fea676b3fcc Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Tue, 10 May 2022 09:42:08 -0700 Subject: [PATCH 02/10] Refactoring: mixedClusterVersion error fix + move Stage to ReplicationState Signed-off-by: Poojita Raj --- .../indices/recovery/IndexRecoveryIT.java | 2 +- .../recovery/PeerRecoveryTargetService.java | 8 +- .../recovery/RecoveriesCollection.java | 28 +++-- .../indices/recovery/RecoveryState.java | 78 +++---------- .../indices/recovery/RecoveryTarget.java | 9 +- .../replication/common/ReplicationState.java | 103 +++++++++--------- .../replication/common/ReplicationTarget.java | 1 - .../indices/recovery/RecoveryTargetTests.java | 2 +- .../recovery/RecoveriesCollectionTests.java | 2 +- 9 files changed, 98 insertions(+), 135 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index 09caf8f1e4358..cc0411991f317 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -101,7 +101,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.NodeIndicesStats; import org.opensearch.indices.analysis.AnalysisModule; -import org.opensearch.indices.recovery.RecoveryState.Stage; +import org.opensearch.indices.replication.common.ReplicationState.Stage; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.node.NodeClosedException; import org.opensearch.node.RecoverySettingsChunkSizePlugin; diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 033dad7f312d2..28e8d076cfc20 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -238,7 +238,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi final IndexShard indexShard = recoveryTarget.indexShard(); indexShard.preRecovery(); assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node"; - logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId()); + logger.trace("{} preparing shard for peer recovery", recoveryTarget.indexShard().shardId()); indexShard.prepareForIndexRecovery(); final long startingSeqNo = indexShard.recoverLocallyUpToGlobalCheckpoint(); assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG @@ -289,7 +289,7 @@ public static StartRecoveryRequest getStartRecoveryRequest( long startingSeqNo ) { final StartRecoveryRequest request; - logger.trace("{} collecting local files for [{}]", recoveryTarget.shardId(), recoveryTarget.sourceNode()); + logger.trace("{} collecting local files for [{}]", recoveryTarget.indexShard().shardId(), recoveryTarget.sourceNode()); Store.MetadataSnapshot metadataSnapshot; try { @@ -332,9 +332,9 @@ public static StartRecoveryRequest getStartRecoveryRequest( } metadataSnapshot = Store.MetadataSnapshot.EMPTY; } - logger.trace("{} local file count [{}]", recoveryTarget.shardId(), metadataSnapshot.size()); + logger.trace("{} local file count [{}]", recoveryTarget.indexShard().shardId(), metadataSnapshot.size()); request = new StartRecoveryRequest( - recoveryTarget.shardId(), + recoveryTarget.indexShard().shardId(), recoveryTarget.indexShard().routingEntry().allocationId().getId(), recoveryTarget.sourceNode(), localNode, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java index 9c42e3f32e166..3e5ab94c317da 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java @@ -86,7 +86,12 @@ public long startRecovery(IndexShard indexShard, DiscoveryNode sourceNode, Repli private void startRecoveryInternal(RecoveryTarget recoveryTarget, TimeValue activityTimeout) { RecoveryTarget existingTarget = onGoingRecoveries.putIfAbsent(recoveryTarget.getId(), recoveryTarget); assert existingTarget == null : "found two RecoveryStatus instances with the same id"; - logger.trace("{} started recovery from {}, id [{}]", recoveryTarget.shardId(), recoveryTarget.sourceNode(), recoveryTarget.getId()); + logger.trace( + "{} started recovery from {}, id [{}]", + recoveryTarget.indexShard().shardId(), + recoveryTarget.sourceNode(), + recoveryTarget.getId() + ); threadPool.schedule( new RecoveryMonitor(recoveryTarget.getId(), recoveryTarget.lastAccessTime(), activityTimeout), activityTimeout, @@ -122,7 +127,7 @@ public RecoveryTarget resetRecovery(final long recoveryId, final TimeValue activ if (successfulReset) { logger.trace( "{} restarted recovery from {}, id [{}], previous id [{}]", - newRecoveryTarget.shardId(), + newRecoveryTarget.indexShard().shardId(), newRecoveryTarget.sourceNode(), newRecoveryTarget.getId(), oldRecoveryTarget.getId() @@ -131,7 +136,7 @@ public RecoveryTarget resetRecovery(final long recoveryId, final TimeValue activ } else { logger.trace( "{} recovery could not be reset as it is already cancelled, recovery from {}, id [{}], previous id [{}]", - newRecoveryTarget.shardId(), + newRecoveryTarget.indexShard().shardId(), newRecoveryTarget.sourceNode(), newRecoveryTarget.getId(), oldRecoveryTarget.getId() @@ -171,7 +176,7 @@ public RecoveryRef getRecoverySafe(long id, ShardId shardId) { if (recoveryRef == null) { throw new IndexShardClosedException(shardId); } - assert recoveryRef.get().shardId().equals(shardId); + assert recoveryRef.get().indexShard().shardId().equals(shardId); return recoveryRef; } @@ -182,7 +187,7 @@ public boolean cancelRecovery(long id, String reason) { if (removed != null) { logger.trace( "{} canceled recovery from {}, id [{}] (reason [{}])", - removed.shardId(), + removed.indexShard().shardId(), removed.sourceNode(), removed.getId(), reason @@ -205,7 +210,7 @@ public void failRecovery(long id, RecoveryFailedException e, boolean sendShardFa if (removed != null) { logger.trace( "{} failing recovery from {}, id [{}]. Send shard failure: [{}]", - removed.shardId(), + removed.indexShard().shardId(), removed.sourceNode(), removed.getId(), sendShardFailure @@ -218,7 +223,12 @@ public void failRecovery(long id, RecoveryFailedException e, boolean sendShardFa public void markRecoveryAsDone(long id) { RecoveryTarget removed = onGoingRecoveries.remove(id); if (removed != null) { - logger.trace("{} marking recovery from {} as done, id [{}]", removed.shardId(), removed.sourceNode(), removed.getId()); + logger.trace( + "{} marking recovery from {} as done, id [{}]", + removed.indexShard().shardId(), + removed.sourceNode(), + removed.getId() + ); removed.markAsDone(); } } @@ -241,7 +251,7 @@ public boolean cancelRecoveriesForShard(ShardId shardId, String reason) { synchronized (onGoingRecoveries) { for (Iterator it = onGoingRecoveries.values().iterator(); it.hasNext();) { RecoveryTarget status = it.next(); - if (status.shardId().equals(shardId)) { + if (status.indexShard().shardId().equals(shardId)) { matchedRecoveries.add(status); it.remove(); } @@ -250,7 +260,7 @@ public boolean cancelRecoveriesForShard(ShardId shardId, String reason) { for (RecoveryTarget removed : matchedRecoveries) { logger.trace( "{} canceled recovery from {}, id [{}] (reason [{}])", - removed.shardId(), + removed.indexShard().shardId(), removed.sourceNode(), removed.getId(), reason diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java index d2303f950e154..5ae3ad9bb01df 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java @@ -59,63 +59,6 @@ */ public class RecoveryState extends ReplicationState implements ToXContentFragment, Writeable { - /** - * The stage of the recovery state - * - * @opensearch.internal - */ - public enum Stage { - INIT((byte) 0), - - /** - * recovery of lucene files, either reusing local ones are copying new ones - */ - INDEX((byte) 1), - - /** - * potentially running check index - */ - VERIFY_INDEX((byte) 2), - - /** - * starting up the engine, replaying the translog - */ - TRANSLOG((byte) 3), - - /** - * performing final task after all translog ops have been done - */ - FINALIZE((byte) 4), - - DONE((byte) 5); - - private static final Stage[] STAGES = new Stage[Stage.values().length]; - - static { - for (Stage stage : Stage.values()) { - assert stage.id() < STAGES.length && stage.id() >= 0; - STAGES[stage.id] = stage; - } - } - - private final byte id; - - Stage(byte id) { - this.id = id; - } - - public byte id() { - return id; - } - - public static Stage fromId(byte id) { - if (id < 0 || id >= STAGES.length) { - throw new IllegalArgumentException("No mapping for id [" + id + "]"); - } - return STAGES[id]; - } - } - private Stage stage; private final Translog translog; @@ -156,12 +99,13 @@ public RecoveryState( } public RecoveryState(StreamInput in) throws IOException { - super(in); + timer = new ReplicationTimer(in); stage = Stage.fromId(in.readByte()); shardId = new ShardId(in); recoverySource = RecoverySource.readFrom(in); targetNode = new DiscoveryNode(in); sourceNode = in.readOptionalWriteable(DiscoveryNode::new); + index = new ReplicationLuceneIndex(in); translog = new Translog(in); verifyIndex = new VerifyIndex(in); primary = in.readBoolean(); @@ -169,12 +113,13 @@ public RecoveryState(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); + timer.writeTo(out); out.writeByte(stage.id()); shardId.writeTo(out); recoverySource.writeTo(out); targetNode.writeTo(out); out.writeOptionalWriteable(sourceNode); + index.writeTo(out); translog.writeTo(out); verifyIndex.writeTo(out); out.writeBoolean(primary); @@ -275,11 +220,15 @@ public boolean getPrimary() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - super.toXContent(builder, params); builder.field(Fields.ID, shardId.id()); builder.field(Fields.TYPE, recoverySource.getType()); builder.field(Fields.STAGE, stage.toString()); builder.field(Fields.PRIMARY, primary); + builder.timeField(Fields.START_TIME_IN_MILLIS, Fields.START_TIME, timer.startTime()); + if (timer.stopTime() > 0) { + builder.timeField(Fields.STOP_TIME_IN_MILLIS, Fields.STOP_TIME, timer.stopTime()); + } + builder.humanReadableField(Fields.TOTAL_TIME_IN_MILLIS, Fields.TOTAL_TIME, new TimeValue(timer.time())); if (recoverySource.getType() == RecoverySource.Type.PEER) { builder.startObject(Fields.SOURCE); builder.field(Fields.ID, sourceNode.getId()); @@ -302,6 +251,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(Fields.NAME, targetNode.getName()); builder.endObject(); + builder.startObject(Fields.INDEX); + index.toXContent(builder, params); + builder.endObject(); + builder.startObject(Fields.TRANSLOG); translog.toXContent(builder, params); builder.endObject(); @@ -323,6 +276,10 @@ static final class Fields { static final String TYPE = "type"; static final String STAGE = "stage"; static final String PRIMARY = "primary"; + static final String START_TIME = "start_time"; + static final String START_TIME_IN_MILLIS = "start_time_in_millis"; + static final String STOP_TIME = "stop_time"; + static final String STOP_TIME_IN_MILLIS = "stop_time_in_millis"; static final String TOTAL_TIME = "total_time"; static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; static final String SOURCE = "source"; @@ -331,6 +288,7 @@ static final class Fields { static final String IP = "ip"; static final String NAME = "name"; static final String TARGET = "target"; + static final String INDEX = "index"; static final String TRANSLOG = "translog"; static final String TOTAL_ON_START = "total_on_start"; static final String VERIFY_INDEX = "verify_index"; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index 1a1de4beb5720..34b25ac97d833 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -36,8 +36,8 @@ import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; import org.opensearch.Assertions; -import org.opensearch.OpenSearchException; import org.opensearch.ExceptionsHelper; +import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.cluster.node.DiscoveryNode; @@ -52,7 +52,6 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardNotRecoveringException; import org.opensearch.index.shard.IndexShardState; -import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; @@ -75,7 +74,6 @@ public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetH private static final String RECOVERY_PREFIX = "recovery."; - private final ShardId shardId; private final DiscoveryNode sourceNode; private final CancellableThreads cancellableThreads; @@ -91,7 +89,6 @@ public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetH */ public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener) { super("recovery_status", indexShard, indexShard.recoveryState().getIndex(), listener); - this.shardId = indexShard.shardId(); this.cancellableThreads = new CancellableThreads(); this.sourceNode = sourceNode; indexShard.recoveryStats().incCurrentAsTarget(); @@ -106,10 +103,6 @@ public RecoveryTarget retryCopy() { return new RecoveryTarget(indexShard, sourceNode, listener); } - public ShardId shardId() { - return shardId; - } - public IndexShard indexShard() { ensureRefCount(); return indexShard; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java index 86ceff4fc9eb7..2029dc85f2295 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java @@ -8,20 +8,63 @@ package org.opensearch.indices.replication.common; -import org.opensearch.common.io.stream.StreamInput; -import org.opensearch.common.io.stream.StreamOutput; -import org.opensearch.common.io.stream.Writeable; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.xcontent.ToXContentFragment; -import org.opensearch.common.xcontent.XContentBuilder; - -import java.io.IOException; - -public class ReplicationState implements ToXContentFragment, Writeable { +public class ReplicationState { protected ReplicationTimer timer; protected ReplicationLuceneIndex index; + public enum Stage { + INIT((byte) 0), + + /** + * recovery of lucene files, either reusing local ones are copying new ones + */ + INDEX((byte) 1), + + /** + * potentially running check index + */ + VERIFY_INDEX((byte) 2), + + /** + * starting up the engine, replaying the translog + */ + TRANSLOG((byte) 3), + + /** + * performing final task after all translog ops have been done + */ + FINALIZE((byte) 4), + + DONE((byte) 5); + + private static final Stage[] STAGES = new Stage[Stage.values().length]; + + static { + for (Stage stage : Stage.values()) { + assert stage.id() < STAGES.length && stage.id() >= 0; + STAGES[stage.id] = stage; + } + } + + private final byte id; + + Stage(byte id) { + this.id = id; + } + + public byte id() { + return id; + } + + public static Stage fromId(byte id) { + if (id < 0 || id >= STAGES.length) { + throw new IllegalArgumentException("No mapping for id [" + id + "]"); + } + return STAGES[id]; + } + } + protected ReplicationState() { // Empty default constructor for subclasses } @@ -40,44 +83,4 @@ public ReplicationLuceneIndex getIndex() { return index; } - public ReplicationState(StreamInput in) throws IOException { - timer = new ReplicationTimer(in); - index = new ReplicationLuceneIndex(in); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - timer.writeTo(out); - index.writeTo(out); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.timeField(ReplicationState.Fields.START_TIME_IN_MILLIS, ReplicationState.Fields.START_TIME, timer.startTime()); - if (timer.stopTime() > 0) { - builder.timeField(ReplicationState.Fields.STOP_TIME_IN_MILLIS, ReplicationState.Fields.STOP_TIME, timer.stopTime()); - } - builder.humanReadableField( - ReplicationState.Fields.TOTAL_TIME_IN_MILLIS, - ReplicationState.Fields.TOTAL_TIME, - new TimeValue(timer.time()) - ); - - builder.startObject(ReplicationState.Fields.INDEX); - index.toXContent(builder, params); - builder.endObject(); - - return builder; - } - - static final class Fields { - static final String START_TIME = "start_time"; - static final String START_TIME_IN_MILLIS = "start_time_in_millis"; - static final String STOP_TIME = "stop_time"; - static final String STOP_TIME_IN_MILLIS = "stop_time_in_millis"; - static final String TOTAL_TIME = "total_time"; - static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; - static final String INDEX = "index"; - } - } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index c457ee92d912f..3698d3c39f969 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -26,7 +26,6 @@ public abstract class ReplicationTarget extends AbstractRefCounted { - // TODO will this cause issues because its shared between subclasses? private static final AtomicLong ID_GENERATOR = new AtomicLong(); // last time the target/status was accessed diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java index bb296f32ca8bd..590b89e97fa03 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java @@ -43,7 +43,7 @@ import org.opensearch.index.shard.ShardId; import org.opensearch.indices.replication.common.ReplicationLuceneIndex.FileMetadata; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.recovery.RecoveryState.Stage; +import org.opensearch.indices.replication.common.ReplicationState.Stage; import org.opensearch.indices.recovery.RecoveryState.Translog; import org.opensearch.indices.recovery.RecoveryState.VerifyIndex; import org.opensearch.indices.replication.common.ReplicationTimer; diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java index dec9374830c0e..04114bda1ce2a 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java @@ -114,7 +114,7 @@ public void testRecoveryCancellation() throws Exception { final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); final long recoveryId2 = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); try (RecoveriesCollection.RecoveryRef recoveryRef = collection.getRecovery(recoveryId)) { - ShardId shardId = recoveryRef.get().shardId(); + ShardId shardId = recoveryRef.get().indexShard().shardId(); assertTrue("failed to cancel recoveries", collection.cancelRecoveriesForShard(shardId, "test")); assertThat("all recoveries should be cancelled", collection.size(), equalTo(0)); } finally { From ba51c17bf259971a7b396e215c639dbf934c2072 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Thu, 12 May 2022 13:05:42 -0700 Subject: [PATCH 03/10] pull ReplicationListener into a top level class + add javadocs + address review comments Signed-off-by: Poojita Raj --- .../opensearch/index/shard/IndexShard.java | 6 +- .../opensearch/indices/IndicesService.java | 4 +- .../cluster/IndicesClusterStateService.java | 51 +++++---------- .../recovery/PeerRecoveryTargetService.java | 5 +- .../indices/recovery/RecoveryListener.java | 62 +++++++++++++++++++ .../common/ReplicationListener.java | 5 ++ .../common/ReplicationRequestTracker.java} | 6 +- .../replication/common/ReplicationState.java | 7 ++- .../replication/common/ReplicationTarget.java | 26 ++++---- ...actIndicesClusterStateServiceTestCase.java | 8 +-- ...va => ReplicationRequestTrackerTests.java} | 5 +- 11 files changed, 121 insertions(+), 64 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java rename server/src/main/java/org/opensearch/indices/{recovery/RecoveryRequestTracker.java => replication/common/ReplicationRequestTracker.java} (96%) rename server/src/test/java/org/opensearch/indices/recovery/{RecoveryRequestTrackerTests.java => ReplicationRequestTrackerTests.java} (95%) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index d12f7c6505f5d..8002dfe688def 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -157,9 +157,9 @@ import org.opensearch.indices.cluster.IndicesClusterStateService; import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryFailedException; +import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.rest.RestStatus; @@ -2877,7 +2877,7 @@ protected Engine getEngineOrNull() { public void startRecovery( RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, - ReplicationListener recoveryListener, + RecoveryListener recoveryListener, RepositoriesService repositoriesService, Consumer mappingUpdateConsumer, IndicesService indicesService @@ -2985,7 +2985,7 @@ public void startRecovery( private void executeRecovery( String reason, RecoveryState recoveryState, - ReplicationListener recoveryListener, + RecoveryListener recoveryListener, CheckedConsumer, Exception> action ) { markAsRecovering(reason, recoveryState); // mark the shard as recovering on the cluster state thread diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index f832afc59b469..1c7e45323813c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -136,8 +136,8 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.mapper.MapperRegistry; import org.opensearch.indices.recovery.PeerRecoveryTargetService; +import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.node.Node; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; @@ -840,7 +840,7 @@ public synchronized void verifyIndexMetadata(IndexMetadata metadata, IndexMetada public IndexShard createShard( final ShardRouting shardRouting, final PeerRecoveryTargetService recoveryTargetService, - final ReplicationListener recoveryListener, + final RecoveryListener recoveryListener, final RepositoriesService repositoriesService, final Consumer onShardFailure, final Consumer globalCheckpointSyncer, diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index 766deb0eed82a..b2a4d93636ab1 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -35,7 +35,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.OpenSearchException; import org.opensearch.ResourceAlreadyExistsException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterChangedEvent; @@ -79,9 +78,8 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.PeerRecoverySourceService; import org.opensearch.indices.recovery.PeerRecoveryTargetService; +import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; @@ -206,6 +204,18 @@ public IndicesClusterStateService( this.sendRefreshMapping = settings.getAsBoolean("indices.cluster.send_refresh_mapping", true); } + public ShardStateAction getShardStateAction() { + return shardStateAction; + } + + public ClusterService getClusterService() { + return clusterService; + } + + public ActionListener getShardStateActionListener() { + return SHARD_STATE_ACTION_LISTENER; + } + @Override protected void doStart() { // Doesn't make sense to manage shards on non-master and non-data nodes @@ -626,7 +636,7 @@ private void createShard(DiscoveryNodes nodes, RoutingTable routingTable, ShardR indicesService.createShard( shardRouting, recoveryTargetService, - new RecoveryListener(shardRouting, primaryTerm), + new RecoveryListener(shardRouting, primaryTerm, this), repositoriesService, failedShardHandler, globalCheckpointSyncer, @@ -741,37 +751,8 @@ private static DiscoveryNode findSourceNodeForPeerRecovery( return sourceNode; } - private class RecoveryListener implements ReplicationListener { - - /** - * ShardRouting with which the shard was created - */ - private final ShardRouting shardRouting; - - /** - * Primary term with which the shard was created - */ - private final long primaryTerm; - - private RecoveryListener(final ShardRouting shardRouting, final long primaryTerm) { - this.shardRouting = shardRouting; - this.primaryTerm = primaryTerm; - } - - @Override - public void onDone(ReplicationState state) { - RecoveryState RecState = (RecoveryState) state; - shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + RecState.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); - } - - @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { - handleRecoveryFailure(shardRouting, sendShardFailure, e); - } - } - // package-private for testing - synchronized void handleRecoveryFailure(ShardRouting shardRouting, boolean sendShardFailure, Exception failure) { + public synchronized void handleRecoveryFailure(ShardRouting shardRouting, boolean sendShardFailure, Exception failure) { failAndRemoveShard(shardRouting, sendShardFailure, "failed recovery", failure, clusterService.state()); } @@ -1007,7 +988,7 @@ U createIndex(IndexMetadata indexMetadata, List builtInIndex T createShard( ShardRouting shardRouting, PeerRecoveryTargetService recoveryTargetService, - ReplicationListener recoveryListener, + RecoveryListener recoveryListener, RepositoriesService repositoriesService, Consumer onShardFailure, Consumer globalCheckpointSyncer, diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 28e8d076cfc20..8663daf7d4698 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -37,10 +37,10 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.RateLimiter; +import org.opensearch.ExceptionsHelper; import org.opensearch.LegacyESVersion; import org.opensearch.OpenSearchException; import org.opensearch.OpenSearchTimeoutException; -import org.opensearch.ExceptionsHelper; import org.opensearch.action.ActionListener; import org.opensearch.action.ActionRunnable; import org.opensearch.action.support.ChannelActionListener; @@ -70,7 +70,6 @@ import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogCorruptedException; import org.opensearch.indices.recovery.RecoveriesCollection.RecoveryRef; -import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.tasks.Task; @@ -190,7 +189,7 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh } } - public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final ReplicationListener listener) { + public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final RecoveryListener listener) { // create a new recovery status, and process... final long recoveryId = onGoingRecoveries.startRecovery(indexShard, sourceNode, listener, recoverySettings.activityTimeout()); // we fork off quickly here and go async but this is called from the cluster state applier thread too and that can cause diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java new file mode 100644 index 0000000000000..bc2fb8eec0050 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.recovery; + +import org.opensearch.OpenSearchException; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.indices.cluster.IndicesClusterStateService; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; + +/** + * Listener that runs on changes in Recovery state + * + * @opensearch.internal + */ +public class RecoveryListener implements ReplicationListener { + + /** + * ShardRouting with which the shard was created + */ + private final ShardRouting shardRouting; + + /** + * Primary term with which the shard was created + */ + private final long primaryTerm; + + private final IndicesClusterStateService indicesClusterStateService; + + public RecoveryListener( + final ShardRouting shardRouting, + final long primaryTerm, + IndicesClusterStateService indicesClusterStateService + ) { + this.shardRouting = shardRouting; + this.primaryTerm = primaryTerm; + this.indicesClusterStateService = indicesClusterStateService; + } + + @Override + public void onDone(ReplicationState state) { + RecoveryState RecState = (RecoveryState) state; + indicesClusterStateService.getShardStateAction() + .shardStarted( + shardRouting, + primaryTerm, + "after " + RecState.getRecoverySource(), + indicesClusterStateService.getShardStateActionListener() + ); + } + + @Override + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + indicesClusterStateService.handleRecoveryFailure(shardRouting, sendShardFailure, e); + } +} diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java index f0972176169bd..455061e7bb6b0 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java @@ -10,6 +10,11 @@ import org.opensearch.OpenSearchException; +/** + * Interface for listeners that run when there's a change in replication state + * + * @opensearch.internal + */ public interface ReplicationListener { void onDone(ReplicationState state); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryRequestTracker.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java similarity index 96% rename from server/src/main/java/org/opensearch/indices/recovery/RecoveryRequestTracker.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java index 71a7f2776f324..7ac31b5a49ea2 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryRequestTracker.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.indices.recovery; +package org.opensearch.indices.replication.common; import org.opensearch.action.ActionListener; import org.opensearch.common.Nullable; @@ -45,11 +45,11 @@ import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED; /** - * Tracks recovery requests + * Tracks replication/recovery requests * * @opensearch.internal */ -public class RecoveryRequestTracker { +public class ReplicationRequestTracker { private final Map> ongoingRequests = Collections.synchronizedMap(new HashMap<>()); private final LocalCheckpointTracker checkpointTracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java index 2029dc85f2295..b4deb6936df5a 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java @@ -8,7 +8,12 @@ package org.opensearch.indices.replication.common; -public class ReplicationState { +/** + * Keeps track of different stages involved in replication + * + * @opensearch.internal + */ +public abstract class ReplicationState { protected ReplicationTimer timer; protected ReplicationLuceneIndex index; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index 3698d3c39f969..5e0ce9f10a1a4 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -19,18 +19,22 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.recovery.MultiFileWriter; -import org.opensearch.indices.recovery.RecoveryRequestTracker; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +/** + * Implemented to represent recovery or segment replication where the current node is the target of the process. + * + * @opensearch.internal + */ public abstract class ReplicationTarget extends AbstractRefCounted { private static final AtomicLong ID_GENERATOR = new AtomicLong(); // last time the target/status was accessed private volatile long lastAccessTime = System.nanoTime(); - private final RecoveryRequestTracker requestTracker = new RecoveryRequestTracker(); + private final ReplicationRequestTracker requestTracker = new ReplicationRequestTracker(); private final long id; protected final AtomicBoolean finished = new AtomicBoolean(); @@ -70,7 +74,7 @@ public long getId() { } /** - * return the last time this RecoveryStatus was used (based on System.nanoTime() + * return the last time this ReplicationStatus was used (based on System.nanoTime() */ public long lastAccessTime() { return lastAccessTime; @@ -113,14 +117,14 @@ public void writeFileChunk( } /** - * mark the current recovery as done + * mark the current replication as done */ public void markAsDone() { if (finished.compareAndSet(false, true)) { try { onDone(); } finally { - // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + // release the initial reference. replication files will be cleaned as soon as ref count goes to zero, potentially now decRef(); } listener.onDone(state()); @@ -128,26 +132,26 @@ public void markAsDone() { } /** - * cancel the recovery. calling this method will clean temporary files and release the store + * cancel the replication. calling this method will clean temporary files and release the store * unless this object is in use (in which case it will be cleaned once all ongoing users call * {@link #decRef()} */ public void cancel(String reason) { if (finished.compareAndSet(false, true)) { try { - logger.debug("recovery canceled (reason: [{}])", reason); + logger.debug("replication cancelled (reason: [{}])", reason); onCancel(reason); } finally { - // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + // release the initial reference. replication files will be cleaned as soon as ref count goes to zero, potentially now decRef(); } } } /** - * fail the recovery and call listener + * fail the replication and call listener * - * @param e exception that encapsulating the failure + * @param e exception that encapsulates the failure * @param sendShardFailure indicates whether to notify the master of the shard failure */ public void fail(OpenSearchException e, boolean sendShardFailure) { @@ -158,7 +162,7 @@ public void fail(OpenSearchException e, boolean sendShardFailure) { try { onFail(e, sendShardFailure); } finally { - // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now + // release the initial reference. replication files will be cleaned as soon as ref count goes to zero, potentially now decRef(); } } diff --git a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java index 6ba75d504dab4..97cb1dc341b13 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java +++ b/server/src/test/java/org/opensearch/indices/cluster/AbstractIndicesClusterStateServiceTestCase.java @@ -32,6 +32,7 @@ package org.opensearch.indices.cluster; +import org.junit.Before; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; @@ -56,11 +57,10 @@ import org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices; import org.opensearch.indices.cluster.IndicesClusterStateService.Shard; import org.opensearch.indices.recovery.PeerRecoveryTargetService; +import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.repositories.RepositoriesService; import org.opensearch.test.OpenSearchTestCase; -import org.junit.Before; import java.io.IOException; import java.util.HashMap; @@ -74,9 +74,9 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; -import static org.opensearch.common.collect.MapBuilder.newMapBuilder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.opensearch.common.collect.MapBuilder.newMapBuilder; /** * Abstract base class for tests against {@link IndicesClusterStateService} @@ -254,7 +254,7 @@ public MockIndexService indexService(Index index) { public MockIndexShard createShard( final ShardRouting shardRouting, final PeerRecoveryTargetService recoveryTargetService, - final ReplicationListener recoveryListener, + final RecoveryListener recoveryListener, final RepositoriesService repositoriesService, final Consumer onShardFailure, final Consumer globalCheckpointSyncer, diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryRequestTrackerTests.java b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java similarity index 95% rename from server/src/test/java/org/opensearch/indices/recovery/RecoveryRequestTrackerTests.java rename to server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java index 931d36f587db8..afad385deabe4 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryRequestTrackerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java @@ -36,6 +36,7 @@ import org.opensearch.action.ActionListener; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.indices.replication.common.ReplicationRequestTracker; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; @@ -44,7 +45,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentMap; -public class RecoveryRequestTrackerTests extends OpenSearchTestCase { +public class ReplicationRequestTrackerTests extends OpenSearchTestCase { private TestThreadPool threadPool; @@ -64,7 +65,7 @@ public void testIdempotencyIsEnforced() { Set seqNosReturned = ConcurrentCollections.newConcurrentSet(); ConcurrentMap>> seqToResult = ConcurrentCollections.newConcurrentMap(); - RecoveryRequestTracker requestTracker = new RecoveryRequestTracker(); + ReplicationRequestTracker requestTracker = new ReplicationRequestTracker(); int numberOfRequests = randomIntBetween(100, 200); for (int j = 0; j < numberOfRequests; ++j) { From d7fc75667d1c83e8460318c7d664aef51eacbd37 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Fri, 13 May 2022 09:38:00 -0700 Subject: [PATCH 04/10] fix javadoc Signed-off-by: Poojita Raj --- .../indices/replication/common/ReplicationState.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java index b4deb6936df5a..02113f71f41ab 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java @@ -18,6 +18,11 @@ public abstract class ReplicationState { protected ReplicationTimer timer; protected ReplicationLuceneIndex index; + /** + * The stage of the recovery state + * + * @opensearch.internal + */ public enum Stage { INIT((byte) 0), From b84c184853609b88a2d5f5a95a236425f5104426 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Tue, 17 May 2022 11:11:03 -0700 Subject: [PATCH 05/10] review changes Signed-off-by: Poojita Raj --- .../cluster/IndicesClusterStateService.java | 18 ++++++------------ .../indices/recovery/RecoveriesCollection.java | 2 +- .../indices/recovery/RecoveryListener.java | 9 +-------- .../replication/common/ReplicationTarget.java | 4 +++- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index b2a4d93636ab1..d1623df156593 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -80,6 +80,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; @@ -204,18 +205,6 @@ public IndicesClusterStateService( this.sendRefreshMapping = settings.getAsBoolean("indices.cluster.send_refresh_mapping", true); } - public ShardStateAction getShardStateAction() { - return shardStateAction; - } - - public ClusterService getClusterService() { - return clusterService; - } - - public ActionListener getShardStateActionListener() { - return SHARD_STATE_ACTION_LISTENER; - } - @Override protected void doStart() { // Doesn't make sense to manage shards on non-master and non-data nodes @@ -756,6 +745,11 @@ public synchronized void handleRecoveryFailure(ShardRouting shardRouting, boolea failAndRemoveShard(shardRouting, sendShardFailure, "failed recovery", failure, clusterService.state()); } + public void handleRecoveryDone(ReplicationState state, ShardRouting shardRouting, long primaryTerm) { + RecoveryState RecState = (RecoveryState) state; + shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + RecState.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); + } + private void failAndRemoveShard( ShardRouting shardRouting, boolean sendShardFailure, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java index 3e5ab94c317da..26e9edb842614 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java @@ -85,7 +85,7 @@ public long startRecovery(IndexShard indexShard, DiscoveryNode sourceNode, Repli private void startRecoveryInternal(RecoveryTarget recoveryTarget, TimeValue activityTimeout) { RecoveryTarget existingTarget = onGoingRecoveries.putIfAbsent(recoveryTarget.getId(), recoveryTarget); - assert existingTarget == null : "found two RecoveryStatus instances with the same id"; + assert existingTarget == null : "found two RecoveryTarget instances with the same id"; logger.trace( "{} started recovery from {}, id [{}]", recoveryTarget.indexShard().shardId(), diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java index bc2fb8eec0050..b93c054ffa4bf 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java @@ -45,14 +45,7 @@ public RecoveryListener( @Override public void onDone(ReplicationState state) { - RecoveryState RecState = (RecoveryState) state; - indicesClusterStateService.getShardStateAction() - .shardStarted( - shardRouting, - primaryTerm, - "after " + RecState.getRecoverySource(), - indicesClusterStateService.getShardStateActionListener() - ); + indicesClusterStateService.handleRecoveryDone(state, shardRouting, primaryTerm); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index 5e0ce9f10a1a4..1accfc0a2b6f9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -171,7 +171,9 @@ public void fail(OpenSearchException e, boolean sendShardFailure) { protected void ensureRefCount() { if (refCount() <= 0) { - throw new OpenSearchException("RecoveryStatus is used but it's refcount is 0. Probably a mismatch between incRef/decRef calls"); + throw new OpenSearchException( + "ReplicationTarget is used but it's refcount is 0. Probably a mismatch between incRef/decRef calls" + ); } } From 8447624ab6f659e1f0c9bd72053a21450a6b6f34 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Wed, 18 May 2022 10:14:19 -0700 Subject: [PATCH 06/10] Refactoring the hierarchy relationship between repl and recovery Signed-off-by: Poojita Raj --- .../gateway/RecoveryFromGatewayIT.java | 2 +- .../indices/recovery/IndexRecoveryIT.java | 4 +- .../cluster/metadata/IndexMetadata.java | 2 +- .../org/opensearch/index/IndexSettings.java | 2 +- .../opensearch/index/shard/StoreRecovery.java | 2 +- .../cluster/IndicesClusterStateService.java | 4 +- .../common/ReplicationLuceneIndex.java | 2 +- .../common/ReplicationRequestTracker.java | 2 +- .../common/ReplicationTimer.java | 2 +- .../common/ReplicationType.java | 2 +- .../ShardTarget.java} | 75 ++-- .../indices/common/ShardTargetCollection.java | 319 +++++++++++++++++ .../ShardTargetListener.java} | 8 +- .../indices/common/ShardTargetState.java | 18 + .../common/package-info.java | 2 +- .../indices/recovery/MultiFileWriter.java | 2 +- .../recovery/PeerRecoveryTargetService.java | 53 +-- .../recovery/RecoveriesCollection.java | 333 ------------------ .../indices/recovery/RecoveryListener.java | 10 +- .../indices/recovery/RecoveryState.java | 76 +++- .../indices/recovery/RecoveryTarget.java | 60 ++-- .../replication/common/ReplicationState.java | 96 ----- .../RecoveryDuringReplicationTests.java | 4 +- .../index/shard/IndexShardTests.java | 2 +- .../index/shard/StoreRecoveryTests.java | 2 +- .../recovery/RecoverySourceHandlerTests.java | 2 +- .../indices/recovery/RecoveryTargetTests.java | 8 +- .../indices/recovery/RecoveryTests.java | 10 +- .../ReplicationRequestTrackerTests.java | 2 +- .../recovery/RecoveriesCollectionTests.java | 56 +-- .../repositories/fs/FsRepositoryTests.java | 2 +- .../action/cat/RestRecoveryActionTests.java | 4 +- .../index/shard/IndexShardTestCase.java | 10 +- 33 files changed, 578 insertions(+), 600 deletions(-) rename server/src/main/java/org/opensearch/indices/{replication => }/common/ReplicationLuceneIndex.java (99%) rename server/src/main/java/org/opensearch/indices/{replication => }/common/ReplicationRequestTracker.java (98%) rename server/src/main/java/org/opensearch/indices/{replication => }/common/ReplicationTimer.java (98%) rename server/src/main/java/org/opensearch/indices/{replication => }/common/ReplicationType.java (94%) rename server/src/main/java/org/opensearch/indices/{replication/common/ReplicationTarget.java => common/ShardTarget.java} (70%) create mode 100644 server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java rename server/src/main/java/org/opensearch/indices/{replication/common/ReplicationListener.java => common/ShardTargetListener.java} (67%) create mode 100644 server/src/main/java/org/opensearch/indices/common/ShardTargetState.java rename server/src/main/java/org/opensearch/indices/{replication => }/common/package-info.java (83%) delete mode 100644 server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java delete mode 100644 server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 79ffe12d13129..7d43ae06cb92e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -62,7 +62,7 @@ import org.opensearch.index.shard.ShardPath; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index cc0411991f317..7d90f2d8fac71 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -101,8 +101,8 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.NodeIndicesStats; import org.opensearch.indices.analysis.AnalysisModule; -import org.opensearch.indices.replication.common.ReplicationState.Stage; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.recovery.RecoveryState.Stage; import org.opensearch.node.NodeClosedException; import org.opensearch.node.RecoverySettingsChunkSizePlugin; import org.opensearch.plugins.AnalysisPlugin; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index ec70e642ababc..696599c7f57b9 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -70,7 +70,7 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.indices.common.ReplicationType; import org.opensearch.rest.RestStatus; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e40acb94ee498..177e490beb3bf 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -46,7 +46,7 @@ import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.indices.common.ReplicationType; import org.opensearch.ingest.IngestService; import org.opensearch.node.Node; diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index 0499140237dbd..b68727b07990f 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -62,7 +62,7 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index d1623df156593..db6f5c4fd9d21 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -80,7 +80,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationState; +import org.opensearch.indices.common.ShardTargetState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; @@ -745,7 +745,7 @@ public synchronized void handleRecoveryFailure(ShardRouting shardRouting, boolea failAndRemoveShard(shardRouting, sendShardFailure, "failed recovery", failure, clusterService.state()); } - public void handleRecoveryDone(ReplicationState state, ShardRouting shardRouting, long primaryTerm) { + public void handleRecoveryDone(ShardTargetState state, ShardRouting shardRouting, long primaryTerm) { RecoveryState RecState = (RecoveryState) state; shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + RecState.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java b/server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java similarity index 99% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java rename to server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java index d1066b867f982..c9566ee18b7a5 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java +++ b/server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java b/server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java similarity index 98% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java rename to server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java index 7ac31b5a49ea2..07fd606ee083c 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java +++ b/server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; import org.opensearch.action.ActionListener; import org.opensearch.common.Nullable; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java b/server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java similarity index 98% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java rename to server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java index eeb0ba8bb2c00..241cb17039835 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java +++ b/server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java b/server/src/main/java/org/opensearch/indices/common/ReplicationType.java similarity index 94% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java rename to server/src/main/java/org/opensearch/indices/common/ReplicationType.java index ac4840e390c0b..d0499b64a328e 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java +++ b/server/src/main/java/org/opensearch/indices/common/ReplicationType.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; /** * Enumerates the types of replication strategies supported by OpenSearch. diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/common/ShardTarget.java similarity index 70% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java rename to server/src/main/java/org/opensearch/indices/common/ShardTarget.java index 1accfc0a2b6f9..e287a2fae489a 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/common/ShardTarget.java @@ -6,29 +6,28 @@ * compatible open source license. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; import org.apache.logging.log4j.Logger; +import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.common.UUIDs; -import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.logging.Loggers; +import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.AbstractRefCounted; import org.opensearch.index.shard.IndexShard; -import org.opensearch.index.store.Store; -import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.recovery.MultiFileWriter; +import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; /** - * Implemented to represent recovery or segment replication where the current node is the target of the process. + * Represents the target of an operation performed on a shard * * @opensearch.internal */ -public abstract class ReplicationTarget extends AbstractRefCounted { +public abstract class ShardTarget extends AbstractRefCounted { private static final AtomicLong ID_GENERATOR = new AtomicLong(); @@ -39,10 +38,9 @@ public abstract class ReplicationTarget extends AbstractRefCounted { protected final AtomicBoolean finished = new AtomicBoolean(); protected final IndexShard indexShard; - protected final Store store; - protected final ReplicationListener listener; - protected final MultiFileWriter multiFileWriter; + protected final ShardTargetListener listener; protected final Logger logger; + protected final CancellableThreads cancellableThreads; protected final ReplicationLuceneIndex recoveryStateIndex; protected abstract String getPrefix(); @@ -51,28 +49,40 @@ public abstract class ReplicationTarget extends AbstractRefCounted { protected abstract void onCancel(String reason); - protected abstract void onFail(OpenSearchException e, boolean sendShardFailure); + public abstract ShardTargetState state(); - public abstract ReplicationState state(); + public abstract ShardTarget retryCopy(); - public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ReplicationListener listener) { + public abstract String description(); + + public ShardTargetListener getListener() { + return listener; + } + + public CancellableThreads cancellableThreads() { + return cancellableThreads; + } + + public abstract void notifyListener(Exception e, boolean sendShardFailure); + + public ShardTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ShardTargetListener listener) { super(name); this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); this.listener = listener; this.id = ID_GENERATOR.incrementAndGet(); this.recoveryStateIndex = recoveryStateIndex; this.indexShard = indexShard; - this.store = indexShard.store(); final String tempFilePrefix = getPrefix() + UUIDs.randomBase64UUID() + "."; - this.multiFileWriter = new MultiFileWriter(indexShard.store(), recoveryStateIndex, tempFilePrefix, logger, this::ensureRefCount); // make sure the store is not released until we are done. - store.incRef(); + this.cancellableThreads = new CancellableThreads(); } public long getId() { return id; } + public abstract boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException; + /** * return the last time this ReplicationStatus was used (based on System.nanoTime() */ @@ -96,26 +106,6 @@ public IndexShard indexShard() { return indexShard; } - public Store store() { - ensureRefCount(); - return store; - } - - public void writeFileChunk( - StoreFileMetadata fileMetadata, - long position, - BytesReference content, - boolean lastChunk, - ActionListener actionListener - ) { - try { - multiFileWriter.writeFileChunk(fileMetadata, position, content, lastChunk); - actionListener.onResponse(null); - } catch (Exception e) { - actionListener.onFailure(e); - } - } - /** * mark the current replication as done */ @@ -157,10 +147,10 @@ public void cancel(String reason) { public void fail(OpenSearchException e, boolean sendShardFailure) { if (finished.compareAndSet(false, true)) { try { - listener.onFailure(state(), e, sendShardFailure); + notifyListener(e, sendShardFailure); } finally { try { - onFail(e, sendShardFailure); + cancellableThreads.cancel("failed" + description() + "[" + ExceptionsHelper.stackTrace(e) + "]"); } finally { // release the initial reference. replication files will be cleaned as soon as ref count goes to zero, potentially now decRef(); @@ -177,13 +167,4 @@ protected void ensureRefCount() { } } - @Override - protected void closeInternal() { - try { - multiFileWriter.close(); - } finally { - // free store. increment happens in constructor - store.decRef(); - } - } } diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java b/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java new file mode 100644 index 0000000000000..523155e1f0d93 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java @@ -0,0 +1,319 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices.common; + +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchException; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.common.concurrent.AutoCloseableRefCounted; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.AbstractRunnable; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardClosedException; +import org.opensearch.index.shard.ShardId; +import org.opensearch.threadpool.ThreadPool; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ConcurrentMap; + +/** + * This class holds a collection of all on going events on the current node (i.e., the node is the target node + * of those events). The class is used to guarantee concurrent semantics such that once a recoveries was done/cancelled/failed + * no other thread will be able to find it. Last, the {@link ShardTargetRef} inner class verifies that recovery temporary files + * and store will only be cleared once on going usage is finished. + * + * @opensearch.internal + */ +public class ShardTargetCollection { + + /** This is the single source of truth for ongoing target events. If it's not here, it was canceled or done */ + private final ConcurrentMap onGoingTargetEvents = ConcurrentCollections.newConcurrentMap(); + + private final Logger logger; + private final ThreadPool threadPool; + + public ShardTargetCollection(Logger logger, ThreadPool threadPool) { + this.logger = logger; + this.threadPool = threadPool; + } + + /** + * Starts are new recovery for the given shard, source node and state + * + * @return the id of the new recovery. + */ + public long start(T target, TimeValue activityTimeout) { + startInternal(target, activityTimeout); + return target.getId(); + } + + private void startInternal(T target, TimeValue activityTimeout) { + T existingTarget = onGoingTargetEvents.putIfAbsent(target.getId(), target); + assert existingTarget == null : "found two Target instances with the same id"; + logger.trace( + "started {}", + target.description() + ); + threadPool.schedule( + new ShardTargetMonitor(target.getId(), target.lastAccessTime(), activityTimeout), + activityTimeout, + ThreadPool.Names.GENERIC + ); + } + + /** + * Resets the recovery and performs a recovery restart on the currently recovering index shard + * + * @see IndexShard#performRecoveryRestart() + * @return newly created RecoveryTarget + */ + @SuppressWarnings(value = "unchecked") + public T resetRecovery(final long recoveryId, final TimeValue activityTimeout) { + T oldTarget = null; + final T newTarget; + + try { + synchronized (onGoingTargetEvents) { + // swap recovery targets in a synchronized block to ensure that the newly added recovery target is picked up by + // cancelRecoveriesForShard whenever the old recovery target is picked up + oldTarget = onGoingTargetEvents.remove(recoveryId); + if (oldTarget == null) { + return null; + } + + newTarget = (T) oldTarget.retryCopy(); + startInternal(newTarget, activityTimeout); + } + + // Closes the current recovery target + boolean successfulReset = oldTarget.resetRecovery(newTarget.cancellableThreads()); + if (successfulReset) { + logger.trace( + "restarted {}, previous id [{}]", + newTarget.description(), + oldTarget.getId() + ); + return newTarget; + } else { + logger.trace( + "{} could not be reset as it is already cancelled, previous id [{}]", + newTarget.description(), + oldTarget.getId() + ); + cancel(newTarget.getId(), "recovery cancelled during reset"); + return null; + } + } catch (Exception e) { + // fail shard to be safe + assert oldTarget != null; + oldTarget.notifyListener(e, true); + return null; + } + } + + public T getTarget(long id) { + return onGoingTargetEvents.get(id); + } + + /** + * gets the {@link ShardTarget } for a given id. The ShardTarget returned has it's ref count already incremented + * to make sure it's safe to use. However, you must call {@link ShardTarget#decRef()} when you are done with it, typically + * by using this method in a try-with-resources clause. + *

+ * Returns null if recovery is not found + */ + public ShardTargetRef get(long id) { + T status = onGoingTargetEvents.get(id); + if (status != null && status.tryIncRef()) { + return new ShardTargetRef(status); + } + return null; + } + + /** Similar to {@link #get(long)} but throws an exception if no target is found */ + public ShardTargetRef getSafe(long id, ShardId shardId) { + ShardTargetRef ref = get(id); + if (ref == null) { + throw new IndexShardClosedException(shardId); + } + assert ref.get().indexShard().shardId().equals(shardId); + return ref; + } + + /** cancel the target with the given id (if found) and remove it from the target collection */ + public boolean cancel(long id, String reason) { + T removed = onGoingTargetEvents.remove(id); + boolean cancelled = false; + if (removed != null) { + logger.trace( + "canceled {} (reason [{}])", + removed.description(), + reason + ); + removed.cancel(reason); + cancelled = true; + } + return cancelled; + } + + /** + * fail the target with the given id (if found) and remove it from the target collection + * + * @param id id of the target to fail + * @param e exception with reason for the failure + * @param sendShardFailure true a shard failed message should be sent to the master + */ + public void fail(long id, OpenSearchException e, boolean sendShardFailure) { + T removed = onGoingTargetEvents.remove(id); + if (removed != null) { + logger.trace( + "failing {}. Send shard failure: [{}]", + removed.description(), + sendShardFailure + ); + removed.fail(e, sendShardFailure); + } + } + + /** mark the target with the given id as done (if found) */ + public void markAsDone(long id) { + T removed = onGoingTargetEvents.remove(id); + if (removed != null) { + logger.trace( + "Marking {} as done", + removed.description() + ); + removed.markAsDone(); + } + } + + /** the number of ongoing recoveries */ + public int size() { + return onGoingTargetEvents.size(); + } + + /** + * cancel all ongoing targets for the given shard + * + * @param reason reason for cancellation + * @param shardId shardId for which to cancel targets + * @return true if a target was cancelled + */ + public boolean cancelForShard(ShardId shardId, String reason) { + boolean cancelled = false; + List matchedTargets = new ArrayList<>(); + synchronized (onGoingTargetEvents) { + for (Iterator it = onGoingTargetEvents.values().iterator(); it.hasNext();) { + T status = it.next(); + if (status.indexShard().shardId().equals(shardId)) { + matchedTargets.add(status); + it.remove(); + } + } + } + for (T removed : matchedTargets) { + logger.trace( + "canceled {} (reason [{}])", + removed.description(), + reason + ); + removed.cancel(reason); + cancelled = true; + } + return cancelled; + } + + /** + * a reference to {@link ShardTarget}, which implements {@link AutoCloseable}. closing the reference + * causes {@link ShardTarget#decRef()} to be called. This makes sure that the underlying resources + * will not be freed until {@link ShardTargetRef#close()} is called. + * + * @opensearch.internal + */ + public static class ShardTargetRef extends AutoCloseableRefCounted { + + /** + * Important: {@link ShardTarget#tryIncRef()} should + * be *successfully* called on status before + */ + public ShardTargetRef(T status) { + super(status); + status.setLastAccessTime(); + } + } + + private class ShardTargetMonitor extends AbstractRunnable { + private final long id; + private final TimeValue checkInterval; + + private volatile long lastSeenAccessTime; + + private ShardTargetMonitor(long id, long lastSeenAccessTime, TimeValue checkInterval) { + this.id = id; + this.checkInterval = checkInterval; + this.lastSeenAccessTime = lastSeenAccessTime; + } + + @Override + public void onFailure(Exception e) { + logger.error(() -> new ParameterizedMessage("unexpected error while monitoring [{}]", id), e); + } + + @Override + protected void doRun() throws Exception { + T status = onGoingTargetEvents.get(id); + if (status == null) { + logger.trace("[monitor] no status found for [{}], shutting down", id); + return; + } + long accessTime = status.lastAccessTime(); + if (accessTime == lastSeenAccessTime) { + String message = "no activity after [" + checkInterval + "]"; + fail( + id, + new OpenSearchTimeoutException(message), + true // to be safe, we don't know what go stuck + ); + return; + } + lastSeenAccessTime = accessTime; + logger.trace("[monitor] rescheduling check for [{}]. last access time is [{}]", id, lastSeenAccessTime); + threadPool.schedule(this, checkInterval, ThreadPool.Names.GENERIC); + } + } + +} diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java b/server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java similarity index 67% rename from server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java rename to server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java index 455061e7bb6b0..56ee2e6271bb8 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java +++ b/server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; import org.opensearch.OpenSearchException; @@ -15,9 +15,9 @@ * * @opensearch.internal */ -public interface ReplicationListener { +public interface ShardTargetListener { - void onDone(ReplicationState state); + void onDone(ShardTargetState state); - void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure); + void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure); } diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetState.java b/server/src/main/java/org/opensearch/indices/common/ShardTargetState.java new file mode 100644 index 0000000000000..a0a70891cd143 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/common/ShardTargetState.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.common; + +/** + * Represents a state object used to track copying of segments from an external source + * + * @opensearch.internal + */ +public interface ShardTargetState { + +} diff --git a/server/src/main/java/org/opensearch/indices/replication/common/package-info.java b/server/src/main/java/org/opensearch/indices/common/package-info.java similarity index 83% rename from server/src/main/java/org/opensearch/indices/replication/common/package-info.java rename to server/src/main/java/org/opensearch/indices/common/package-info.java index 77a8422f83829..32da849ed9f34 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/package-info.java +++ b/server/src/main/java/org/opensearch/indices/common/package-info.java @@ -7,4 +7,4 @@ */ /** Foundation classes for Replication Implementation. */ -package org.opensearch.indices.replication.common; +package org.opensearch.indices.common; diff --git a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java index 3509615052707..b9aeba4d45892 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java +++ b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java @@ -44,7 +44,7 @@ import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.transport.Transports; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 8663daf7d4698..c78cdc25ec5cd 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -69,9 +69,10 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogCorruptedException; -import org.opensearch.indices.recovery.RecoveriesCollection.RecoveryRef; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.indices.common.ShardTargetCollection; +import org.opensearch.indices.common.ShardTargetCollection.ShardTargetRef; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationTimer; import org.opensearch.tasks.Task; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.ConnectTransportException; @@ -124,7 +125,7 @@ public static class Actions { private final RecoverySettings recoverySettings; private final ClusterService clusterService; - private final RecoveriesCollection onGoingRecoveries; + private final ShardTargetCollection onGoingRecoveries; public PeerRecoveryTargetService( ThreadPool threadPool, @@ -136,7 +137,7 @@ public PeerRecoveryTargetService( this.transportService = transportService; this.recoverySettings = recoverySettings; this.clusterService = clusterService; - this.onGoingRecoveries = new RecoveriesCollection(logger, threadPool); + this.onGoingRecoveries = new ShardTargetCollection<>(logger, threadPool); transportService.registerRequestHandler( Actions.FILES_INFO, @@ -185,13 +186,13 @@ public PeerRecoveryTargetService( @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { if (indexShard != null) { - onGoingRecoveries.cancelRecoveriesForShard(shardId, "shard closed"); + onGoingRecoveries.cancelForShard(shardId, "shard closed"); } } public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final RecoveryListener listener) { // create a new recovery status, and process... - final long recoveryId = onGoingRecoveries.startRecovery(indexShard, sourceNode, listener, recoverySettings.activityTimeout()); + final long recoveryId = onGoingRecoveries.start(new RecoveryTarget(indexShard, sourceNode, listener), recoverySettings.activityTimeout()); // we fork off quickly here and go async but this is called from the cluster state applier thread too and that can cause // assertions to trip if we executed it on the same thread hence we fork off to the generic threadpool. threadPool.generic().execute(new RecoveryRunner(recoveryId)); @@ -225,7 +226,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi final TransportRequest requestToSend; final StartRecoveryRequest startRequest; final ReplicationTimer timer; - try (RecoveryRef recoveryRef = onGoingRecoveries.getRecovery(recoveryId)) { + try (ShardTargetCollection.ShardTargetRef recoveryRef = onGoingRecoveries.get(recoveryId)) { if (recoveryRef == null) { logger.trace("not running recovery with id [{}] - can not find it (probably finished)", recoveryId); return; @@ -248,7 +249,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi } catch (final Exception e) { // this will be logged as warning later on... logger.trace("unexpected error while preparing shard for peer recovery, failing recovery", e); - onGoingRecoveries.failRecovery( + onGoingRecoveries.fail( recoveryId, new RecoveryFailedException(recoveryTarget.state(), "failed to prepare shard for recovery", e), true @@ -349,7 +350,7 @@ class PrepareForTranslogOperationsRequestHandler implements TransportRequestHand @Override public void messageReceived(RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel, Task task) { - try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) { + try (ShardTargetRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.PREPARE_TRANSLOG, request); if (listener == null) { return; @@ -364,7 +365,7 @@ class FinalizeRecoveryRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FINALIZE, request); if (listener == null) { return; @@ -380,7 +381,7 @@ class HandoffPrimaryContextRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { recoveryRef.get().handoffPrimaryContext(request.primaryContext()); } channel.sendResponse(TransportResponse.Empty.INSTANCE); @@ -393,7 +394,7 @@ class TranslogOperationsRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final RecoveryTarget recoveryTarget = recoveryRef.get(); final ActionListener listener = createOrFinishListener( recoveryRef, @@ -413,7 +414,7 @@ public void messageReceived(final RecoveryTranslogOperationsRequest request, fin private void performTranslogOps( final RecoveryTranslogOperationsRequest request, final ActionListener listener, - final RecoveryRef recoveryRef + final ShardTargetRef recoveryRef ) { final RecoveryTarget recoveryTarget = recoveryRef.get(); @@ -428,7 +429,7 @@ private void performTranslogOps( @Override public void onNewClusterState(ClusterState state) { threadPool.generic().execute(ActionRunnable.wrap(listener, l -> { - try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) { + try (ShardTargetRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { performTranslogOps(request, listener, recoveryRef); } })); @@ -474,7 +475,7 @@ class FilesInfoRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FILES_INFO, request); if (listener == null) { return; @@ -497,7 +498,7 @@ class CleanFilesRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.CLEAN_FILES, request); if (listener == null) { return; @@ -516,7 +517,7 @@ class FileChunkTransportRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final RecoveryTarget recoveryTarget = recoveryRef.get(); final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FILE_CHUNK, request); if (listener == null) { @@ -552,7 +553,7 @@ public void messageReceived(final RecoveryFileChunkRequest request, TransportCha } private ActionListener createOrFinishListener( - final RecoveryRef recoveryRef, + final ShardTargetRef recoveryRef, final TransportChannel channel, final String action, final RecoveryTransportRequest request @@ -561,7 +562,7 @@ private ActionListener createOrFinishListener( } private ActionListener createOrFinishListener( - final RecoveryRef recoveryRef, + final ShardTargetRef recoveryRef, final TransportChannel channel, final String action, final RecoveryTransportRequest request, @@ -598,10 +599,10 @@ class RecoveryRunner extends AbstractRunnable { @Override public void onFailure(Exception e) { - try (RecoveryRef recoveryRef = onGoingRecoveries.getRecovery(recoveryId)) { + try (ShardTargetRef recoveryRef = onGoingRecoveries.get(recoveryId)) { if (recoveryRef != null) { logger.error(() -> new ParameterizedMessage("unexpected error during recovery [{}], failing shard", recoveryId), e); - onGoingRecoveries.failRecovery( + onGoingRecoveries.fail( recoveryId, new RecoveryFailedException(recoveryRef.get().state(), "unexpected error", e), true // be safe @@ -637,7 +638,7 @@ private RecoveryResponseHandler(final StartRecoveryRequest request, final Replic public void handleResponse(RecoveryResponse recoveryResponse) { final TimeValue recoveryTime = new TimeValue(timer.time()); // do this through ongoing recoveries to remove it from the collection - onGoingRecoveries.markRecoveryAsDone(recoveryId); + onGoingRecoveries.markAsDone(recoveryId); if (logger.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append('[') @@ -698,7 +699,7 @@ private void onException(Exception e) { Throwable cause = ExceptionsHelper.unwrapCause(e); if (cause instanceof CancellableThreads.ExecutionCancelledException) { // this can also come from the source wrapped in a RemoteTransportException - onGoingRecoveries.failRecovery( + onGoingRecoveries.fail( recoveryId, new RecoveryFailedException(request, "source has canceled the recovery", cause), false @@ -755,11 +756,11 @@ private void onException(Exception e) { } if (cause instanceof AlreadyClosedException) { - onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, "source shard is closed", cause), false); + onGoingRecoveries.fail(recoveryId, new RecoveryFailedException(request, "source shard is closed", cause), false); return; } - onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, e), true); + onGoingRecoveries.fail(recoveryId, new RecoveryFailedException(request, e), true); } @Override diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java deleted file mode 100644 index 26e9edb842614..0000000000000 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveriesCollection.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices.recovery; - -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.OpenSearchTimeoutException; -import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.common.concurrent.AutoCloseableRefCounted; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.util.concurrent.AbstractRunnable; -import org.opensearch.common.util.concurrent.ConcurrentCollections; -import org.opensearch.index.shard.IndexShard; -import org.opensearch.index.shard.IndexShardClosedException; -import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.threadpool.ThreadPool; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.ConcurrentMap; - -/** - * This class holds a collection of all on going recoveries on the current node (i.e., the node is the target node - * of those recoveries). The class is used to guarantee concurrent semantics such that once a recoveries was done/cancelled/failed - * no other thread will be able to find it. Last, the {@link RecoveryRef} inner class verifies that recovery temporary files - * and store will only be cleared once on going usage is finished. - * - * @opensearch.internal - */ -public class RecoveriesCollection { - - /** This is the single source of truth for ongoing recoveries. If it's not here, it was canceled or done */ - private final ConcurrentMap onGoingRecoveries = ConcurrentCollections.newConcurrentMap(); - - private final Logger logger; - private final ThreadPool threadPool; - - public RecoveriesCollection(Logger logger, ThreadPool threadPool) { - this.logger = logger; - this.threadPool = threadPool; - } - - /** - * Starts are new recovery for the given shard, source node and state - * - * @return the id of the new recovery. - */ - public long startRecovery(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener, TimeValue activityTimeout) { - RecoveryTarget recoveryTarget = new RecoveryTarget(indexShard, sourceNode, listener); - startRecoveryInternal(recoveryTarget, activityTimeout); - return recoveryTarget.getId(); - } - - private void startRecoveryInternal(RecoveryTarget recoveryTarget, TimeValue activityTimeout) { - RecoveryTarget existingTarget = onGoingRecoveries.putIfAbsent(recoveryTarget.getId(), recoveryTarget); - assert existingTarget == null : "found two RecoveryTarget instances with the same id"; - logger.trace( - "{} started recovery from {}, id [{}]", - recoveryTarget.indexShard().shardId(), - recoveryTarget.sourceNode(), - recoveryTarget.getId() - ); - threadPool.schedule( - new RecoveryMonitor(recoveryTarget.getId(), recoveryTarget.lastAccessTime(), activityTimeout), - activityTimeout, - ThreadPool.Names.GENERIC - ); - } - - /** - * Resets the recovery and performs a recovery restart on the currently recovering index shard - * - * @see IndexShard#performRecoveryRestart() - * @return newly created RecoveryTarget - */ - public RecoveryTarget resetRecovery(final long recoveryId, final TimeValue activityTimeout) { - RecoveryTarget oldRecoveryTarget = null; - final RecoveryTarget newRecoveryTarget; - - try { - synchronized (onGoingRecoveries) { - // swap recovery targets in a synchronized block to ensure that the newly added recovery target is picked up by - // cancelRecoveriesForShard whenever the old recovery target is picked up - oldRecoveryTarget = onGoingRecoveries.remove(recoveryId); - if (oldRecoveryTarget == null) { - return null; - } - - newRecoveryTarget = oldRecoveryTarget.retryCopy(); - startRecoveryInternal(newRecoveryTarget, activityTimeout); - } - - // Closes the current recovery target - boolean successfulReset = oldRecoveryTarget.resetRecovery(newRecoveryTarget.cancellableThreads()); - if (successfulReset) { - logger.trace( - "{} restarted recovery from {}, id [{}], previous id [{}]", - newRecoveryTarget.indexShard().shardId(), - newRecoveryTarget.sourceNode(), - newRecoveryTarget.getId(), - oldRecoveryTarget.getId() - ); - return newRecoveryTarget; - } else { - logger.trace( - "{} recovery could not be reset as it is already cancelled, recovery from {}, id [{}], previous id [{}]", - newRecoveryTarget.indexShard().shardId(), - newRecoveryTarget.sourceNode(), - newRecoveryTarget.getId(), - oldRecoveryTarget.getId() - ); - cancelRecovery(newRecoveryTarget.getId(), "recovery cancelled during reset"); - return null; - } - } catch (Exception e) { - // fail shard to be safe - oldRecoveryTarget.notifyListener(new RecoveryFailedException(oldRecoveryTarget.state(), "failed to retry recovery", e), true); - return null; - } - } - - public RecoveryTarget getRecoveryTarget(long id) { - return onGoingRecoveries.get(id); - } - - /** - * gets the {@link RecoveryTarget } for a given id. The RecoveryStatus returned has it's ref count already incremented - * to make sure it's safe to use. However, you must call {@link RecoveryTarget#decRef()} when you are done with it, typically - * by using this method in a try-with-resources clause. - *

- * Returns null if recovery is not found - */ - public RecoveryRef getRecovery(long id) { - RecoveryTarget status = onGoingRecoveries.get(id); - if (status != null && status.tryIncRef()) { - return new RecoveryRef(status); - } - return null; - } - - /** Similar to {@link #getRecovery(long)} but throws an exception if no recovery is found */ - public RecoveryRef getRecoverySafe(long id, ShardId shardId) { - RecoveryRef recoveryRef = getRecovery(id); - if (recoveryRef == null) { - throw new IndexShardClosedException(shardId); - } - assert recoveryRef.get().indexShard().shardId().equals(shardId); - return recoveryRef; - } - - /** cancel the recovery with the given id (if found) and remove it from the recovery collection */ - public boolean cancelRecovery(long id, String reason) { - RecoveryTarget removed = onGoingRecoveries.remove(id); - boolean cancelled = false; - if (removed != null) { - logger.trace( - "{} canceled recovery from {}, id [{}] (reason [{}])", - removed.indexShard().shardId(), - removed.sourceNode(), - removed.getId(), - reason - ); - removed.cancel(reason); - cancelled = true; - } - return cancelled; - } - - /** - * fail the recovery with the given id (if found) and remove it from the recovery collection - * - * @param id id of the recovery to fail - * @param e exception with reason for the failure - * @param sendShardFailure true a shard failed message should be sent to the master - */ - public void failRecovery(long id, RecoveryFailedException e, boolean sendShardFailure) { - RecoveryTarget removed = onGoingRecoveries.remove(id); - if (removed != null) { - logger.trace( - "{} failing recovery from {}, id [{}]. Send shard failure: [{}]", - removed.indexShard().shardId(), - removed.sourceNode(), - removed.getId(), - sendShardFailure - ); - removed.fail(e, sendShardFailure); - } - } - - /** mark the recovery with the given id as done (if found) */ - public void markRecoveryAsDone(long id) { - RecoveryTarget removed = onGoingRecoveries.remove(id); - if (removed != null) { - logger.trace( - "{} marking recovery from {} as done, id [{}]", - removed.indexShard().shardId(), - removed.sourceNode(), - removed.getId() - ); - removed.markAsDone(); - } - } - - /** the number of ongoing recoveries */ - public int size() { - return onGoingRecoveries.size(); - } - - /** - * cancel all ongoing recoveries for the given shard - * - * @param reason reason for cancellation - * @param shardId shardId for which to cancel recoveries - * @return true if a recovery was cancelled - */ - public boolean cancelRecoveriesForShard(ShardId shardId, String reason) { - boolean cancelled = false; - List matchedRecoveries = new ArrayList<>(); - synchronized (onGoingRecoveries) { - for (Iterator it = onGoingRecoveries.values().iterator(); it.hasNext();) { - RecoveryTarget status = it.next(); - if (status.indexShard().shardId().equals(shardId)) { - matchedRecoveries.add(status); - it.remove(); - } - } - } - for (RecoveryTarget removed : matchedRecoveries) { - logger.trace( - "{} canceled recovery from {}, id [{}] (reason [{}])", - removed.indexShard().shardId(), - removed.sourceNode(), - removed.getId(), - reason - ); - removed.cancel(reason); - cancelled = true; - } - return cancelled; - } - - /** - * a reference to {@link RecoveryTarget}, which implements {@link AutoCloseable}. closing the reference - * causes {@link RecoveryTarget#decRef()} to be called. This makes sure that the underlying resources - * will not be freed until {@link RecoveryRef#close()} is called. - * - * @opensearch.internal - */ - public static class RecoveryRef extends AutoCloseableRefCounted { - - /** - * Important: {@link RecoveryTarget#tryIncRef()} should - * be *successfully* called on status before - */ - public RecoveryRef(RecoveryTarget status) { - super(status); - status.setLastAccessTime(); - } - } - - private class RecoveryMonitor extends AbstractRunnable { - private final long recoveryId; - private final TimeValue checkInterval; - - private volatile long lastSeenAccessTime; - - private RecoveryMonitor(long recoveryId, long lastSeenAccessTime, TimeValue checkInterval) { - this.recoveryId = recoveryId; - this.checkInterval = checkInterval; - this.lastSeenAccessTime = lastSeenAccessTime; - } - - @Override - public void onFailure(Exception e) { - logger.error(() -> new ParameterizedMessage("unexpected error while monitoring recovery [{}]", recoveryId), e); - } - - @Override - protected void doRun() throws Exception { - RecoveryTarget status = onGoingRecoveries.get(recoveryId); - if (status == null) { - logger.trace("[monitor] no status found for [{}], shutting down", recoveryId); - return; - } - long accessTime = status.lastAccessTime(); - if (accessTime == lastSeenAccessTime) { - String message = "no activity after [" + checkInterval + "]"; - failRecovery( - recoveryId, - new RecoveryFailedException(status.state(), message, new OpenSearchTimeoutException(message)), - true // to be safe, we don't know what go stuck - ); - return; - } - lastSeenAccessTime = accessTime; - logger.trace("[monitor] rescheduling check for [{}]. last access time is [{}]", recoveryId, lastSeenAccessTime); - threadPool.schedule(this, checkInterval, ThreadPool.Names.GENERIC); - } - } - -} diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java index b93c054ffa4bf..50bd5de2382c9 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java @@ -11,15 +11,15 @@ import org.opensearch.OpenSearchException; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.indices.cluster.IndicesClusterStateService; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationState; +import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.common.ShardTargetState; /** * Listener that runs on changes in Recovery state * * @opensearch.internal */ -public class RecoveryListener implements ReplicationListener { +public class RecoveryListener implements ShardTargetListener { /** * ShardRouting with which the shard was created @@ -44,12 +44,12 @@ public RecoveryListener( } @Override - public void onDone(ReplicationState state) { + public void onDone(ShardTargetState state) { indicesClusterStateService.handleRecoveryDone(state, shardRouting, primaryTerm); } @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { indicesClusterStateService.handleRecoveryFailure(shardRouting, sendShardFailure, e); } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java index 5ae3ad9bb01df..1ba1d23ca46b8 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java @@ -45,9 +45,9 @@ import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ReplicationState; -import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationTimer; import java.io.IOException; import java.util.Locale; @@ -57,12 +57,71 @@ * * @opensearch.internal */ -public class RecoveryState extends ReplicationState implements ToXContentFragment, Writeable { +public class RecoveryState implements ShardTargetState, ToXContentFragment, Writeable { + + /** + * The stage of the recovery state + * + * @opensearch.internal + */ + public enum Stage { + INIT((byte) 0), + + /** + * recovery of lucene files, either reusing local ones are copying new ones + */ + INDEX((byte) 1), + + /** + * potentially running check index + */ + VERIFY_INDEX((byte) 2), + + /** + * starting up the engine, replaying the translog + */ + TRANSLOG((byte) 3), + + /** + * performing final task after all translog ops have been done + */ + FINALIZE((byte) 4), + + DONE((byte) 5); + + private static final Stage[] STAGES = new Stage[Stage.values().length]; + + static { + for (Stage stage : Stage.values()) { + assert stage.id() < STAGES.length && stage.id() >= 0; + STAGES[stage.id] = stage; + } + } + + private final byte id; + + Stage(byte id) { + this.id = id; + } + + public byte id() { + return id; + } + + public static Stage fromId(byte id) { + if (id < 0 || id >= STAGES.length) { + throw new IllegalArgumentException("No mapping for id [" + id + "]"); + } + return STAGES[id]; + } + } private Stage stage; + private final ReplicationLuceneIndex index; private final Translog translog; private final VerifyIndex verifyIndex; + private final ReplicationTimer timer; private RecoverySource recoverySource; private ShardId shardId; @@ -189,6 +248,10 @@ public synchronized RecoveryState setStage(Stage stage) { return this; } + public ReplicationLuceneIndex getIndex() { + return index; + } + public VerifyIndex getVerifyIndex() { return this.verifyIndex; } @@ -197,6 +260,10 @@ public Translog getTranslog() { return translog; } + public ReplicationTimer getTimer() { + return timer; + } + public RecoverySource getRecoverySource() { return recoverySource; } @@ -229,6 +296,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.timeField(Fields.STOP_TIME_IN_MILLIS, Fields.STOP_TIME, timer.stopTime()); } builder.humanReadableField(Fields.TOTAL_TIME_IN_MILLIS, Fields.TOTAL_TIME, new TimeValue(timer.time())); + if (recoverySource.getType() == RecoverySource.Type.PEER) { builder.startObject(Fields.SOURCE); builder.field(Fields.ID, sourceNode.getId()); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index 34b25ac97d833..abb532a453db4 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -37,10 +37,10 @@ import org.apache.lucene.index.IndexFormatTooOldException; import org.opensearch.Assertions; import org.opensearch.ExceptionsHelper; -import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.common.UUIDs; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.util.CancellableThreads; @@ -55,9 +55,10 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ReplicationTarget; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ShardTarget; +import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.common.ShardTargetCollection; import java.io.IOException; import java.nio.file.Path; @@ -66,16 +67,18 @@ /** * Represents a recovery where the current node is the target node of the recovery. To track recoveries in a central place, instances of - * this class are created through {@link RecoveriesCollection}. + * this class are created through {@link ShardTargetCollection}. * * @opensearch.internal */ -public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetHandler { +public class RecoveryTarget extends ShardTarget implements RecoveryTargetHandler { private static final String RECOVERY_PREFIX = "recovery."; private final DiscoveryNode sourceNode; private final CancellableThreads cancellableThreads; + protected final MultiFileWriter multiFileWriter; + protected final Store store; // latch that can be used to blockingly wait for RecoveryTarget to be closed private final CountDownLatch closedLatch = new CountDownLatch(1); @@ -87,11 +90,15 @@ public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetH * @param sourceNode source node of the recovery where we recover from * @param listener called when recovery is completed/failed */ - public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener) { + public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ShardTargetListener listener) { super("recovery_status", indexShard, indexShard.recoveryState().getIndex(), listener); this.cancellableThreads = new CancellableThreads(); this.sourceNode = sourceNode; indexShard.recoveryStats().incCurrentAsTarget(); + this.store = indexShard.store(); + final String tempFilePrefix = getPrefix() + UUIDs.randomBase64UUID() + "."; + this.multiFileWriter = new MultiFileWriter(indexShard.store(), recoveryStateIndex, tempFilePrefix, logger, this::ensureRefCount); + store.incRef(); } /** @@ -108,6 +115,10 @@ public IndexShard indexShard() { return indexShard; } + public String source() { + return sourceNode.toString(); + } + public DiscoveryNode sourceNode() { return this.sourceNode; } @@ -125,11 +136,20 @@ public Store store() { return store; } + public String description() { + return "recovery from " + source(); + } + + @Override + public void notifyListener(Exception e, boolean sendShardFailure) { + listener.onFailure(state(), new RecoveryFailedException(state(), e.getMessage(), e), sendShardFailure); + } + /** * Closes the current recovery target and waits up to a certain timeout for resources to be freed. * Returns true if resetting the recovery was successful, false if the recovery target is already cancelled / failed or marked as done. */ - boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException { + public boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException { final long recoveryId = getId(); if (finished.compareAndSet(false, true)) { try { @@ -192,10 +212,6 @@ public void fail(RecoveryFailedException e, boolean sendShardFailure) { super.fail(e, sendShardFailure); } - public void notifyListener(RecoveryFailedException e, boolean sendShardFailure) { - listener.onFailure(state(), e, sendShardFailure); - } - /** mark the current recovery as done */ public void markAsDone() { if (finished.compareAndSet(false, true)) { @@ -215,8 +231,9 @@ public void markAsDone() { @Override protected void closeInternal() { try { - super.closeInternal(); + multiFileWriter.close(); } finally { + store.decRef(); indexShard.recoveryStats().decCurrentAsTarget(); closedLatch.countDown(); } @@ -234,6 +251,9 @@ protected String getPrefix() { @Override protected void onDone() { + assert multiFileWriter.tempFileNames.isEmpty() : "not all temporary files are renamed"; + // this might still throw an exception ie. if the shard is CLOSED due to some other event. + // it's safer to decrement the reference in a try finally here. indexShard.postRecovery("peer recovery done"); } @@ -245,11 +265,6 @@ protected void onCancel(String reason) { cancellableThreads.cancel(reason); } - @Override - protected void onFail(OpenSearchException e, boolean sendShardFailure) { - cancellableThreads.cancel("failed recovery [" + ExceptionsHelper.stackTrace(e) + "]"); - } - /*** Implementation of {@link RecoveryTargetHandler } */ @Override @@ -456,8 +471,13 @@ public void writeFileChunk( int totalTranslogOps, ActionListener listener ) { - state().getTranslog().totalOperations(totalTranslogOps); - this.writeFileChunk(fileMetadata, position, content, lastChunk, listener); + try { + state().getTranslog().totalOperations(totalTranslogOps); + multiFileWriter.writeFileChunk(fileMetadata, position, content, lastChunk); + listener.onResponse(null); + } catch (Exception e) { + listener.onFailure(e); + } } /** Get a temporary name for the provided file name. */ diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java deleted file mode 100644 index 02113f71f41ab..0000000000000 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.indices.replication.common; - -/** - * Keeps track of different stages involved in replication - * - * @opensearch.internal - */ -public abstract class ReplicationState { - - protected ReplicationTimer timer; - protected ReplicationLuceneIndex index; - - /** - * The stage of the recovery state - * - * @opensearch.internal - */ - public enum Stage { - INIT((byte) 0), - - /** - * recovery of lucene files, either reusing local ones are copying new ones - */ - INDEX((byte) 1), - - /** - * potentially running check index - */ - VERIFY_INDEX((byte) 2), - - /** - * starting up the engine, replaying the translog - */ - TRANSLOG((byte) 3), - - /** - * performing final task after all translog ops have been done - */ - FINALIZE((byte) 4), - - DONE((byte) 5); - - private static final Stage[] STAGES = new Stage[Stage.values().length]; - - static { - for (Stage stage : Stage.values()) { - assert stage.id() < STAGES.length && stage.id() >= 0; - STAGES[stage.id] = stage; - } - } - - private final byte id; - - Stage(byte id) { - this.id = id; - } - - public byte id() { - return id; - } - - public static Stage fromId(byte id) { - if (id < 0 || id >= STAGES.length) { - throw new IllegalArgumentException("No mapping for id [" + id + "]"); - } - return STAGES[id]; - } - } - - protected ReplicationState() { - // Empty default constructor for subclasses - } - - protected ReplicationState(ReplicationLuceneIndex index) { - this.index = index; - timer = new ReplicationTimer(); - timer.start(); - } - - public ReplicationTimer getTimer() { - return timer; - } - - public ReplicationLuceneIndex getIndex() { - return index; - } - -} diff --git a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java index 509d1f52daa0d..6a5b8fc77689e 100644 --- a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java @@ -71,7 +71,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.common.ShardTargetListener; import java.io.IOException; import java.util.ArrayList; @@ -809,7 +809,7 @@ public BlockingTarget( CountDownLatch releaseRecovery, IndexShard shard, DiscoveryNode sourceNode, - ReplicationListener listener, + ShardTargetListener listener, Logger logger ) { super(shard, sourceNode, listener); diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index e54d30c626812..f340e72c83d0f 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -133,7 +133,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.snapshots.Snapshot; import org.opensearch.snapshots.SnapshotId; diff --git a/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java b/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java index 64b6490ace1c7..f397822422295 100644 --- a/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java +++ b/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java @@ -61,7 +61,7 @@ import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.Uid; import org.opensearch.index.seqno.SequenceNumbers; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java index 1739f546150d9..2036683048c08 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java @@ -94,7 +94,7 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.test.CorruptionUtils; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java index 590b89e97fa03..8f4ec5b6eb05f 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java @@ -41,12 +41,12 @@ import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.Writeable; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex.FileMetadata; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ReplicationState.Stage; +import org.opensearch.indices.common.ReplicationLuceneIndex.FileMetadata; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.recovery.RecoveryState.Stage; import org.opensearch.indices.recovery.RecoveryState.Translog; import org.opensearch.indices.recovery.RecoveryState.VerifyIndex; -import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.indices.common.ReplicationTimer; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java index 5224a54a35e96..ee0a23c6a1297 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java @@ -69,8 +69,8 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.SnapshotMatchers; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationState; +import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.common.ShardTargetState; import java.io.IOException; import java.util.HashMap; @@ -451,14 +451,14 @@ public long addDocument(Iterable doc) throws IOExcepti IndexShard replica = group.addReplica(); expectThrows( Exception.class, - () -> group.recoverReplica(replica, (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new ReplicationListener() { + () -> group.recoverReplica(replica, (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new ShardTargetListener() { @Override - public void onDone(ReplicationState state) { + public void onDone(ShardTargetState state) { throw new AssertionError("recovery must fail"); } @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { assertThat(ExceptionsHelper.unwrap(e, IOException.class).getMessage(), equalTo("simulated")); } })) diff --git a/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java index afad385deabe4..1c4823c2ed9ab 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java @@ -36,7 +36,7 @@ import org.opensearch.action.ActionListener; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.common.util.concurrent.ConcurrentCollections; -import org.opensearch.indices.replication.common.ReplicationRequestTracker; +import org.opensearch.indices.common.ReplicationRequestTracker; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java index 04114bda1ce2a..8bfbb1744f90b 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java @@ -38,11 +38,11 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; -import org.opensearch.indices.recovery.RecoveriesCollection; +import org.opensearch.indices.common.ShardTargetCollection; +import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.common.ShardTargetState; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationState; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -52,48 +52,48 @@ import static org.hamcrest.Matchers.lessThan; public class RecoveriesCollectionTests extends OpenSearchIndexLevelReplicationTestCase { - static final ReplicationListener listener = new ReplicationListener() { + static final ShardTargetListener listener = new ShardTargetListener() { @Override - public void onDone(ReplicationState state) { + public void onDone(ShardTargetState state) { } @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { } }; public void testLastAccessTimeUpdate() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool); + final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); - try (RecoveriesCollection.RecoveryRef status = collection.getRecovery(recoveryId)) { + try (ShardTargetCollection.ShardTargetRef status = collection.get(recoveryId)) { final long lastSeenTime = status.get().lastAccessTime(); assertBusy(() -> { - try (RecoveriesCollection.RecoveryRef currentStatus = collection.getRecovery(recoveryId)) { + try (ShardTargetCollection.ShardTargetRef currentStatus = collection.get(recoveryId)) { assertThat("access time failed to update", lastSeenTime, lessThan(currentStatus.get().lastAccessTime())); } }); } finally { - collection.cancelRecovery(recoveryId, "life"); + collection.cancel(recoveryId, "life"); } } } public void testRecoveryTimeout() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool); + final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); final AtomicBoolean failed = new AtomicBoolean(); final CountDownLatch latch = new CountDownLatch(1); - final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new ReplicationListener() { + final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new ShardTargetListener() { @Override - public void onDone(ReplicationState state) { + public void onDone(ShardTargetState state) { latch.countDown(); } @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { failed.set(true); latch.countDown(); } @@ -102,7 +102,7 @@ public void onFailure(ReplicationState state, OpenSearchException e, boolean sen latch.await(30, TimeUnit.SECONDS); assertTrue("recovery failed to timeout", failed.get()); } finally { - collection.cancelRecovery(recoveryId, "meh"); + collection.cancel(recoveryId, "meh"); } } @@ -110,16 +110,16 @@ public void onFailure(ReplicationState state, OpenSearchException e, boolean sen public void testRecoveryCancellation() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool); + final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); final long recoveryId2 = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); - try (RecoveriesCollection.RecoveryRef recoveryRef = collection.getRecovery(recoveryId)) { + try (ShardTargetCollection.ShardTargetRef recoveryRef = collection.get(recoveryId)) { ShardId shardId = recoveryRef.get().indexShard().shardId(); - assertTrue("failed to cancel recoveries", collection.cancelRecoveriesForShard(shardId, "test")); + assertTrue("failed to cancel recoveries", collection.cancelForShard(shardId, "test")); assertThat("all recoveries should be cancelled", collection.size(), equalTo(0)); } finally { - collection.cancelRecovery(recoveryId, "meh"); - collection.cancelRecovery(recoveryId2, "meh"); + collection.cancel(recoveryId, "meh"); + collection.cancel(recoveryId2, "meh"); } } } @@ -129,10 +129,10 @@ public void testResetRecovery() throws Exception { shards.startAll(); int numDocs = randomIntBetween(1, 15); shards.indexDocs(numDocs); - final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool); + final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); IndexShard shard = shards.addReplica(); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shard); - RecoveryTarget recoveryTarget = collection.getRecoveryTarget(recoveryId); + RecoveryTarget recoveryTarget = collection.getTarget(recoveryId); final int currentAsTarget = shard.recoveryStats().currentAsTarget(); final int referencesToStore = recoveryTarget.store().refCount(); IndexShard indexShard = recoveryTarget.indexShard(); @@ -152,31 +152,31 @@ public void testResetRecovery() throws Exception { String resetTempFileName = resetRecovery.getTempNameForFile("foobar"); assertNotEquals(tempFileName, resetTempFileName); assertEquals(currentAsTarget, shard.recoveryStats().currentAsTarget()); - try (RecoveriesCollection.RecoveryRef newRecoveryRef = collection.getRecovery(resetRecoveryId)) { + try (ShardTargetCollection.ShardTargetRef newRecoveryRef = collection.get(resetRecoveryId)) { shards.recoverReplica(shard, (s, n) -> { assertSame(s, newRecoveryRef.get().indexShard()); return newRecoveryRef.get(); }, false); } shards.assertAllEqual(numDocs); - assertNull("recovery is done", collection.getRecovery(recoveryId)); + assertNull("recovery is done", collection.get(recoveryId)); } } - long startRecovery(RecoveriesCollection collection, DiscoveryNode sourceNode, IndexShard shard) { + long startRecovery(ShardTargetCollection collection, DiscoveryNode sourceNode, IndexShard shard) { return startRecovery(collection, sourceNode, shard, listener, TimeValue.timeValueMinutes(60)); } long startRecovery( - RecoveriesCollection collection, + ShardTargetCollection collection, DiscoveryNode sourceNode, IndexShard indexShard, - ReplicationListener listener, + ShardTargetListener listener, TimeValue timeValue ) { final DiscoveryNode rNode = getDiscoveryNode(indexShard.routingEntry().currentNodeId()); indexShard.markAsRecovering("remote", new RecoveryState(indexShard.routingEntry(), sourceNode, rNode)); indexShard.prepareForIndexRecovery(); - return collection.startRecovery(indexShard, sourceNode, listener, timeValue); + return collection.start(new RecoveryTarget(indexShard, sourceNode, listener), timeValue); } } diff --git a/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java b/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java index 53f124a91f0ac..599991784c43f 100644 --- a/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java +++ b/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java @@ -68,7 +68,7 @@ import org.opensearch.index.store.Store; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.blobstore.BlobStoreTestUtil; import org.opensearch.snapshots.Snapshot; diff --git a/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java b/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java index efd31dda92d09..517d5361bdb9f 100644 --- a/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java +++ b/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java @@ -45,8 +45,8 @@ import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.common.ReplicationTimer; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 298fdcaea6465..392fba45f9260 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -94,8 +94,8 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.recovery.StartRecoveryRequest; -import org.opensearch.indices.replication.common.ReplicationListener; -import org.opensearch.indices.replication.common.ReplicationState; +import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.common.ShardTargetState; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.OpenSearchBlobStoreRepositoryIntegTestCase; @@ -141,14 +141,14 @@ public abstract class IndexShardTestCase extends OpenSearchTestCase { } }; - protected static final ReplicationListener recoveryListener = new ReplicationListener() { + protected static final ShardTargetListener recoveryListener = new ShardTargetListener() { @Override - public void onDone(ReplicationState state) { + public void onDone(ShardTargetState state) { } @Override - public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { throw new AssertionError(e); } }; From 5edeae368a126bf9d7702349d14fe5da4f16c6f1 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Wed, 18 May 2022 10:20:36 -0700 Subject: [PATCH 07/10] style fix Signed-off-by: Poojita Raj --- .../indices/common/ShardTarget.java | 2 +- .../indices/common/ShardTargetCollection.java | 34 ++++--------------- .../recovery/PeerRecoveryTargetService.java | 18 ++++++---- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTarget.java b/server/src/main/java/org/opensearch/indices/common/ShardTarget.java index e287a2fae489a..373c6404c9b8a 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTarget.java +++ b/server/src/main/java/org/opensearch/indices/common/ShardTarget.java @@ -150,7 +150,7 @@ public void fail(OpenSearchException e, boolean sendShardFailure) { notifyListener(e, sendShardFailure); } finally { try { - cancellableThreads.cancel("failed" + description() + "[" + ExceptionsHelper.stackTrace(e) + "]"); + cancellableThreads.cancel("failed" + description() + "[" + ExceptionsHelper.stackTrace(e) + "]"); } finally { // release the initial reference. replication files will be cleaned as soon as ref count goes to zero, potentially now decRef(); diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java b/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java index 523155e1f0d93..2ab0fcd23a98c 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java +++ b/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java @@ -84,10 +84,7 @@ public long start(T target, TimeValue activityTimeout) { private void startInternal(T target, TimeValue activityTimeout) { T existingTarget = onGoingTargetEvents.putIfAbsent(target.getId(), target); assert existingTarget == null : "found two Target instances with the same id"; - logger.trace( - "started {}", - target.description() - ); + logger.trace("started {}", target.description()); threadPool.schedule( new ShardTargetMonitor(target.getId(), target.lastAccessTime(), activityTimeout), activityTimeout, @@ -122,11 +119,7 @@ public T resetRecovery(final long recoveryId, final TimeValue activityTimeout) { // Closes the current recovery target boolean successfulReset = oldTarget.resetRecovery(newTarget.cancellableThreads()); if (successfulReset) { - logger.trace( - "restarted {}, previous id [{}]", - newTarget.description(), - oldTarget.getId() - ); + logger.trace("restarted {}, previous id [{}]", newTarget.description(), oldTarget.getId()); return newTarget; } else { logger.trace( @@ -179,11 +172,7 @@ public boolean cancel(long id, String reason) { T removed = onGoingTargetEvents.remove(id); boolean cancelled = false; if (removed != null) { - logger.trace( - "canceled {} (reason [{}])", - removed.description(), - reason - ); + logger.trace("canceled {} (reason [{}])", removed.description(), reason); removed.cancel(reason); cancelled = true; } @@ -200,11 +189,7 @@ public boolean cancel(long id, String reason) { public void fail(long id, OpenSearchException e, boolean sendShardFailure) { T removed = onGoingTargetEvents.remove(id); if (removed != null) { - logger.trace( - "failing {}. Send shard failure: [{}]", - removed.description(), - sendShardFailure - ); + logger.trace("failing {}. Send shard failure: [{}]", removed.description(), sendShardFailure); removed.fail(e, sendShardFailure); } } @@ -213,10 +198,7 @@ public void fail(long id, OpenSearchException e, boolean sendShardFailure) { public void markAsDone(long id) { T removed = onGoingTargetEvents.remove(id); if (removed != null) { - logger.trace( - "Marking {} as done", - removed.description() - ); + logger.trace("Marking {} as done", removed.description()); removed.markAsDone(); } } @@ -246,11 +228,7 @@ public boolean cancelForShard(ShardId shardId, String reason) { } } for (T removed : matchedTargets) { - logger.trace( - "canceled {} (reason [{}])", - removed.description(), - reason - ); + logger.trace("canceled {} (reason [{}])", removed.description(), reason); removed.cancel(reason); cancelled = true; } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index c78cdc25ec5cd..475f336b44c21 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -192,7 +192,10 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh public void startRecovery(final IndexShard indexShard, final DiscoveryNode sourceNode, final RecoveryListener listener) { // create a new recovery status, and process... - final long recoveryId = onGoingRecoveries.start(new RecoveryTarget(indexShard, sourceNode, listener), recoverySettings.activityTimeout()); + final long recoveryId = onGoingRecoveries.start( + new RecoveryTarget(indexShard, sourceNode, listener), + recoverySettings.activityTimeout() + ); // we fork off quickly here and go async but this is called from the cluster state applier thread too and that can cause // assertions to trip if we executed it on the same thread hence we fork off to the generic threadpool. threadPool.generic().execute(new RecoveryRunner(recoveryId)); @@ -429,7 +432,12 @@ private void performTranslogOps( @Override public void onNewClusterState(ClusterState state) { threadPool.generic().execute(ActionRunnable.wrap(listener, l -> { - try (ShardTargetRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try ( + ShardTargetRef recoveryRef = onGoingRecoveries.getSafe( + request.recoveryId(), + request.shardId() + ) + ) { performTranslogOps(request, listener, recoveryRef); } })); @@ -699,11 +707,7 @@ private void onException(Exception e) { Throwable cause = ExceptionsHelper.unwrapCause(e); if (cause instanceof CancellableThreads.ExecutionCancelledException) { // this can also come from the source wrapped in a RemoteTransportException - onGoingRecoveries.fail( - recoveryId, - new RecoveryFailedException(request, "source has canceled the recovery", cause), - false - ); + onGoingRecoveries.fail(recoveryId, new RecoveryFailedException(request, "source has canceled the recovery", cause), false); return; } if (cause instanceof RecoveryEngineException) { From 47379df615e6d798ca0bead82d81c425db04a0fc Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Wed, 18 May 2022 10:31:13 -0700 Subject: [PATCH 08/10] move package common under replication Signed-off-by: Poojita Raj --- .../org/opensearch/gateway/RecoveryFromGatewayIT.java | 2 +- .../org/opensearch/indices/recovery/IndexRecoveryIT.java | 2 +- .../org/opensearch/cluster/metadata/IndexMetadata.java | 2 +- .../src/main/java/org/opensearch/index/IndexSettings.java | 2 +- .../java/org/opensearch/index/shard/StoreRecovery.java | 2 +- .../indices/cluster/IndicesClusterStateService.java | 2 +- .../org/opensearch/indices/recovery/MultiFileWriter.java | 2 +- .../indices/recovery/PeerRecoveryTargetService.java | 8 ++++---- .../org/opensearch/indices/recovery/RecoveryListener.java | 4 ++-- .../org/opensearch/indices/recovery/RecoveryState.java | 6 +++--- .../org/opensearch/indices/recovery/RecoveryTarget.java | 8 ++++---- .../{ => replication}/common/ReplicationLuceneIndex.java | 2 +- .../common/ReplicationRequestTracker.java | 2 +- .../{ => replication}/common/ReplicationTimer.java | 2 +- .../indices/{ => replication}/common/ReplicationType.java | 2 +- .../indices/{ => replication}/common/ShardTarget.java | 2 +- .../{ => replication}/common/ShardTargetCollection.java | 2 +- .../{ => replication}/common/ShardTargetListener.java | 2 +- .../{ => replication}/common/ShardTargetState.java | 2 +- .../indices/{ => replication}/common/package-info.java | 2 +- .../index/replication/RecoveryDuringReplicationTests.java | 2 +- .../java/org/opensearch/index/shard/IndexShardTests.java | 2 +- .../org/opensearch/index/shard/StoreRecoveryTests.java | 2 +- .../indices/recovery/RecoverySourceHandlerTests.java | 2 +- .../opensearch/indices/recovery/RecoveryTargetTests.java | 6 +++--- .../org/opensearch/indices/recovery/RecoveryTests.java | 4 ++-- .../indices/recovery/ReplicationRequestTrackerTests.java | 2 +- .../opensearch/recovery/RecoveriesCollectionTests.java | 6 +++--- .../org/opensearch/repositories/fs/FsRepositoryTests.java | 2 +- .../rest/action/cat/RestRecoveryActionTests.java | 4 ++-- .../org/opensearch/index/shard/IndexShardTestCase.java | 4 ++-- 31 files changed, 47 insertions(+), 47 deletions(-) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ReplicationLuceneIndex.java (99%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ReplicationRequestTracker.java (98%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ReplicationTimer.java (98%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ReplicationType.java (94%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ShardTarget.java (99%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ShardTargetCollection.java (99%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ShardTargetListener.java (91%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/ShardTargetState.java (87%) rename server/src/main/java/org/opensearch/indices/{ => replication}/common/package-info.java (83%) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 7d43ae06cb92e..79ffe12d13129 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -62,7 +62,7 @@ import org.opensearch.index.shard.ShardPath; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index 7d90f2d8fac71..855cb34ae0587 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -101,7 +101,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.NodeIndicesStats; import org.opensearch.indices.analysis.AnalysisModule; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.recovery.RecoveryState.Stage; import org.opensearch.node.NodeClosedException; import org.opensearch.node.RecoverySettingsChunkSizePlugin; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index 696599c7f57b9..ec70e642ababc 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -70,7 +70,7 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.common.ReplicationType; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.rest.RestStatus; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 177e490beb3bf..e40acb94ee498 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -46,7 +46,7 @@ import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.common.ReplicationType; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.ingest.IngestService; import org.opensearch.node.Node; diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index b68727b07990f..0499140237dbd 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -62,7 +62,7 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index db6f5c4fd9d21..29f9511db6528 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -80,7 +80,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.replication.common.ShardTargetState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; diff --git a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java index b9aeba4d45892..3509615052707 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java +++ b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java @@ -44,7 +44,7 @@ import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.transport.Transports; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 475f336b44c21..34c0c7204884a 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -69,10 +69,10 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogCorruptedException; -import org.opensearch.indices.common.ShardTargetCollection; -import org.opensearch.indices.common.ShardTargetCollection.ShardTargetRef; -import org.opensearch.indices.common.ReplicationLuceneIndex; -import org.opensearch.indices.common.ReplicationTimer; +import org.opensearch.indices.replication.common.ShardTargetCollection; +import org.opensearch.indices.replication.common.ShardTargetCollection.ShardTargetRef; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.tasks.Task; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.ConnectTransportException; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java index 50bd5de2382c9..944fa35ad8210 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java @@ -11,8 +11,8 @@ import org.opensearch.OpenSearchException; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.indices.cluster.IndicesClusterStateService; -import org.opensearch.indices.common.ShardTargetListener; -import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetState; /** * Listener that runs on changes in Recovery state diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java index 1ba1d23ca46b8..293a77e87f2f0 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java @@ -45,9 +45,9 @@ import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.common.ShardTargetState; -import org.opensearch.indices.common.ReplicationLuceneIndex; -import org.opensearch.indices.common.ReplicationTimer; +import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationTimer; import java.io.IOException; import java.util.Locale; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index abb532a453db4..6459155e72cac 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -55,10 +55,10 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.common.ReplicationLuceneIndex; -import org.opensearch.indices.common.ShardTarget; -import org.opensearch.indices.common.ShardTargetListener; -import org.opensearch.indices.common.ShardTargetCollection; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ShardTarget; +import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetCollection; import java.io.IOException; import java.nio.file.Path; diff --git a/server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java similarity index 99% rename from server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java index c9566ee18b7a5..d1066b867f982 100644 --- a/server/src/main/java/org/opensearch/indices/common/ReplicationLuceneIndex.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationLuceneIndex.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; diff --git a/server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java similarity index 98% rename from server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java index 07fd606ee083c..7ac31b5a49ea2 100644 --- a/server/src/main/java/org/opensearch/indices/common/ReplicationRequestTracker.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.opensearch.action.ActionListener; import org.opensearch.common.Nullable; diff --git a/server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java similarity index 98% rename from server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java index 241cb17039835..eeb0ba8bb2c00 100644 --- a/server/src/main/java/org/opensearch/indices/common/ReplicationTimer.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTimer.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; diff --git a/server/src/main/java/org/opensearch/indices/common/ReplicationType.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java similarity index 94% rename from server/src/main/java/org/opensearch/indices/common/ReplicationType.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java index d0499b64a328e..ac4840e390c0b 100644 --- a/server/src/main/java/org/opensearch/indices/common/ReplicationType.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationType.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; /** * Enumerates the types of replication strategies supported by OpenSearch. diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java similarity index 99% rename from server/src/main/java/org/opensearch/indices/common/ShardTarget.java rename to server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java index 373c6404c9b8a..952cbd2f1d683 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.apache.logging.log4j.Logger; import org.opensearch.ExceptionsHelper; diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java similarity index 99% rename from server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java rename to server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java index 2ab0fcd23a98c..079a60526ea74 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTargetCollection.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java similarity index 91% rename from server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java rename to server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java index 56ee2e6271bb8..7517057a16f88 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTargetListener.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; import org.opensearch.OpenSearchException; diff --git a/server/src/main/java/org/opensearch/indices/common/ShardTargetState.java b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java similarity index 87% rename from server/src/main/java/org/opensearch/indices/common/ShardTargetState.java rename to server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java index a0a70891cd143..d66ec99e0a6db 100644 --- a/server/src/main/java/org/opensearch/indices/common/ShardTargetState.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; /** * Represents a state object used to track copying of segments from an external source diff --git a/server/src/main/java/org/opensearch/indices/common/package-info.java b/server/src/main/java/org/opensearch/indices/replication/common/package-info.java similarity index 83% rename from server/src/main/java/org/opensearch/indices/common/package-info.java rename to server/src/main/java/org/opensearch/indices/replication/common/package-info.java index 32da849ed9f34..77a8422f83829 100644 --- a/server/src/main/java/org/opensearch/indices/common/package-info.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/package-info.java @@ -7,4 +7,4 @@ */ /** Foundation classes for Replication Implementation. */ -package org.opensearch.indices.common; +package org.opensearch.indices.replication.common; diff --git a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java index 6a5b8fc77689e..dcf54a0ecbfcf 100644 --- a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java @@ -71,7 +71,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetListener; import java.io.IOException; import java.util.ArrayList; diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index f340e72c83d0f..e54d30c626812 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -133,7 +133,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.snapshots.Snapshot; import org.opensearch.snapshots.SnapshotId; diff --git a/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java b/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java index f397822422295..64b6490ace1c7 100644 --- a/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java +++ b/server/src/test/java/org/opensearch/index/shard/StoreRecoveryTests.java @@ -61,7 +61,7 @@ import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.Uid; import org.opensearch.index.seqno.SequenceNumbers; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java index 2036683048c08..1739f546150d9 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoverySourceHandlerTests.java @@ -94,7 +94,7 @@ import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.test.CorruptionUtils; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java index 8f4ec5b6eb05f..bb296f32ca8bd 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTargetTests.java @@ -41,12 +41,12 @@ import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.Writeable; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.common.ReplicationLuceneIndex.FileMetadata; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex.FileMetadata; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.recovery.RecoveryState.Stage; import org.opensearch.indices.recovery.RecoveryState.Translog; import org.opensearch.indices.recovery.RecoveryState.VerifyIndex; -import org.opensearch.indices.common.ReplicationTimer; +import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java index ee0a23c6a1297..6428a775a92de 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java @@ -69,8 +69,8 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.SnapshotMatchers; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.common.ShardTargetListener; -import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetState; import java.io.IOException; import java.util.HashMap; diff --git a/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java index 1c4823c2ed9ab..afad385deabe4 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/ReplicationRequestTrackerTests.java @@ -36,7 +36,7 @@ import org.opensearch.action.ActionListener; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.common.util.concurrent.ConcurrentCollections; -import org.opensearch.indices.common.ReplicationRequestTracker; +import org.opensearch.indices.replication.common.ReplicationRequestTracker; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java index 8bfbb1744f90b..e77b0753d5497 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java @@ -38,9 +38,9 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; -import org.opensearch.indices.common.ShardTargetCollection; -import org.opensearch.indices.common.ShardTargetListener; -import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.replication.common.ShardTargetCollection; +import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetState; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; diff --git a/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java b/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java index 599991784c43f..53f124a91f0ac 100644 --- a/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java +++ b/server/src/test/java/org/opensearch/repositories/fs/FsRepositoryTests.java @@ -68,7 +68,7 @@ import org.opensearch.index.store.Store; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.blobstore.BlobStoreTestUtil; import org.opensearch.snapshots.Snapshot; diff --git a/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java b/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java index 517d5361bdb9f..efd31dda92d09 100644 --- a/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java +++ b/server/src/test/java/org/opensearch/rest/action/cat/RestRecoveryActionTests.java @@ -45,8 +45,8 @@ import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.common.ReplicationLuceneIndex; -import org.opensearch.indices.common.ReplicationTimer; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 392fba45f9260..36c73963b9c26 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -94,8 +94,8 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.recovery.StartRecoveryRequest; -import org.opensearch.indices.common.ShardTargetListener; -import org.opensearch.indices.common.ShardTargetState; +import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ShardTargetState; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.OpenSearchBlobStoreRepositoryIntegTestCase; From 233acbcd4044ba9695b0816419a8df766e8e162f Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Wed, 18 May 2022 15:17:42 -0700 Subject: [PATCH 09/10] rename to replication Signed-off-by: Poojita Raj --- .../cluster/IndicesClusterStateService.java | 4 +- .../recovery/PeerRecoveryTargetService.java | 34 ++++++++-------- .../indices/recovery/RecoveryListener.java | 10 ++--- .../indices/recovery/RecoveryState.java | 4 +- .../indices/recovery/RecoveryTarget.java | 12 +++--- ...ection.java => ReplicationCollection.java} | 40 +++++++++---------- ...Listener.java => ReplicationListener.java} | 8 ++-- .../common/ReplicationRequestTracker.java | 2 +- ...TargetState.java => ReplicationState.java} | 2 +- ...hardTarget.java => ReplicationTarget.java} | 14 +++---- .../RecoveryDuringReplicationTests.java | 4 +- .../indices/recovery/RecoveryTests.java | 10 ++--- .../recovery/RecoveriesCollectionTests.java | 40 +++++++++---------- .../index/shard/IndexShardTestCase.java | 10 ++--- 14 files changed, 97 insertions(+), 97 deletions(-) rename server/src/main/java/org/opensearch/indices/replication/common/{ShardTargetCollection.java => ReplicationCollection.java} (86%) rename server/src/main/java/org/opensearch/indices/replication/common/{ShardTargetListener.java => ReplicationListener.java} (61%) rename server/src/main/java/org/opensearch/indices/replication/common/{ShardTargetState.java => ReplicationState.java} (91%) rename server/src/main/java/org/opensearch/indices/replication/common/{ShardTarget.java => ReplicationTarget.java} (91%) diff --git a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java index 29f9511db6528..d1623df156593 100644 --- a/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/opensearch/indices/cluster/IndicesClusterStateService.java @@ -80,7 +80,7 @@ import org.opensearch.indices.recovery.PeerRecoveryTargetService; import org.opensearch.indices.recovery.RecoveryListener; import org.opensearch.indices.recovery.RecoveryState; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.RepositoriesService; import org.opensearch.search.SearchService; import org.opensearch.snapshots.SnapshotShardsService; @@ -745,7 +745,7 @@ public synchronized void handleRecoveryFailure(ShardRouting shardRouting, boolea failAndRemoveShard(shardRouting, sendShardFailure, "failed recovery", failure, clusterService.state()); } - public void handleRecoveryDone(ShardTargetState state, ShardRouting shardRouting, long primaryTerm) { + public void handleRecoveryDone(ReplicationState state, ShardRouting shardRouting, long primaryTerm) { RecoveryState RecState = (RecoveryState) state; shardStateAction.shardStarted(shardRouting, primaryTerm, "after " + RecState.getRecoverySource(), SHARD_STATE_ACTION_LISTENER); } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 34c0c7204884a..37b16212a35ee 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -69,8 +69,8 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogCorruptedException; -import org.opensearch.indices.replication.common.ShardTargetCollection; -import org.opensearch.indices.replication.common.ShardTargetCollection.ShardTargetRef; +import org.opensearch.indices.replication.common.ReplicationCollection; +import org.opensearch.indices.replication.common.ReplicationCollection.ReplicationRef; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.tasks.Task; @@ -125,7 +125,7 @@ public static class Actions { private final RecoverySettings recoverySettings; private final ClusterService clusterService; - private final ShardTargetCollection onGoingRecoveries; + private final ReplicationCollection onGoingRecoveries; public PeerRecoveryTargetService( ThreadPool threadPool, @@ -137,7 +137,7 @@ public PeerRecoveryTargetService( this.transportService = transportService; this.recoverySettings = recoverySettings; this.clusterService = clusterService; - this.onGoingRecoveries = new ShardTargetCollection<>(logger, threadPool); + this.onGoingRecoveries = new ReplicationCollection<>(logger, threadPool); transportService.registerRequestHandler( Actions.FILES_INFO, @@ -229,7 +229,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi final TransportRequest requestToSend; final StartRecoveryRequest startRequest; final ReplicationTimer timer; - try (ShardTargetCollection.ShardTargetRef recoveryRef = onGoingRecoveries.get(recoveryId)) { + try (ReplicationRef recoveryRef = onGoingRecoveries.get(recoveryId)) { if (recoveryRef == null) { logger.trace("not running recovery with id [{}] - can not find it (probably finished)", recoveryId); return; @@ -353,7 +353,7 @@ class PrepareForTranslogOperationsRequestHandler implements TransportRequestHand @Override public void messageReceived(RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel, Task task) { - try (ShardTargetRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.PREPARE_TRANSLOG, request); if (listener == null) { return; @@ -368,7 +368,7 @@ class FinalizeRecoveryRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FINALIZE, request); if (listener == null) { return; @@ -384,7 +384,7 @@ class HandoffPrimaryContextRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { recoveryRef.get().handoffPrimaryContext(request.primaryContext()); } channel.sendResponse(TransportResponse.Empty.INSTANCE); @@ -397,7 +397,7 @@ class TranslogOperationsRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final RecoveryTarget recoveryTarget = recoveryRef.get(); final ActionListener listener = createOrFinishListener( recoveryRef, @@ -417,7 +417,7 @@ public void messageReceived(final RecoveryTranslogOperationsRequest request, fin private void performTranslogOps( final RecoveryTranslogOperationsRequest request, final ActionListener listener, - final ShardTargetRef recoveryRef + final ReplicationRef recoveryRef ) { final RecoveryTarget recoveryTarget = recoveryRef.get(); @@ -433,7 +433,7 @@ private void performTranslogOps( public void onNewClusterState(ClusterState state) { threadPool.generic().execute(ActionRunnable.wrap(listener, l -> { try ( - ShardTargetRef recoveryRef = onGoingRecoveries.getSafe( + ReplicationRef recoveryRef = onGoingRecoveries.getSafe( request.recoveryId(), request.shardId() ) @@ -483,7 +483,7 @@ class FilesInfoRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FILES_INFO, request); if (listener == null) { return; @@ -506,7 +506,7 @@ class CleanFilesRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.CLEAN_FILES, request); if (listener == null) { return; @@ -525,7 +525,7 @@ class FileChunkTransportRequestHandler implements TransportRequestHandler recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef recoveryRef = onGoingRecoveries.getSafe(request.recoveryId(), request.shardId())) { final RecoveryTarget recoveryTarget = recoveryRef.get(); final ActionListener listener = createOrFinishListener(recoveryRef, channel, Actions.FILE_CHUNK, request); if (listener == null) { @@ -561,7 +561,7 @@ public void messageReceived(final RecoveryFileChunkRequest request, TransportCha } private ActionListener createOrFinishListener( - final ShardTargetRef recoveryRef, + final ReplicationRef recoveryRef, final TransportChannel channel, final String action, final RecoveryTransportRequest request @@ -570,7 +570,7 @@ private ActionListener createOrFinishListener( } private ActionListener createOrFinishListener( - final ShardTargetRef recoveryRef, + final ReplicationRef recoveryRef, final TransportChannel channel, final String action, final RecoveryTransportRequest request, @@ -607,7 +607,7 @@ class RecoveryRunner extends AbstractRunnable { @Override public void onFailure(Exception e) { - try (ShardTargetRef recoveryRef = onGoingRecoveries.get(recoveryId)) { + try (ReplicationRef recoveryRef = onGoingRecoveries.get(recoveryId)) { if (recoveryRef != null) { logger.error(() -> new ParameterizedMessage("unexpected error during recovery [{}], failing shard", recoveryId), e); onGoingRecoveries.fail( diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java index 944fa35ad8210..b93c054ffa4bf 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryListener.java @@ -11,15 +11,15 @@ import org.opensearch.OpenSearchException; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.indices.cluster.IndicesClusterStateService; -import org.opensearch.indices.replication.common.ShardTargetListener; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; /** * Listener that runs on changes in Recovery state * * @opensearch.internal */ -public class RecoveryListener implements ShardTargetListener { +public class RecoveryListener implements ReplicationListener { /** * ShardRouting with which the shard was created @@ -44,12 +44,12 @@ public RecoveryListener( } @Override - public void onDone(ShardTargetState state) { + public void onDone(ReplicationState state) { indicesClusterStateService.handleRecoveryDone(state, shardRouting, primaryTerm); } @Override - public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { indicesClusterStateService.handleRecoveryFailure(shardRouting, sendShardFailure, e); } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java index 293a77e87f2f0..a3c7adb755145 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryState.java @@ -45,7 +45,7 @@ import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationTimer; @@ -57,7 +57,7 @@ * * @opensearch.internal */ -public class RecoveryState implements ShardTargetState, ToXContentFragment, Writeable { +public class RecoveryState implements ReplicationState, ToXContentFragment, Writeable { /** * The stage of the recovery state diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index 6459155e72cac..052ae035db30a 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -56,9 +56,9 @@ import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.index.translog.Translog; import org.opensearch.indices.replication.common.ReplicationLuceneIndex; -import org.opensearch.indices.replication.common.ShardTarget; -import org.opensearch.indices.replication.common.ShardTargetListener; -import org.opensearch.indices.replication.common.ShardTargetCollection; +import org.opensearch.indices.replication.common.ReplicationTarget; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationCollection; import java.io.IOException; import java.nio.file.Path; @@ -67,11 +67,11 @@ /** * Represents a recovery where the current node is the target node of the recovery. To track recoveries in a central place, instances of - * this class are created through {@link ShardTargetCollection}. + * this class are created through {@link ReplicationCollection}. * * @opensearch.internal */ -public class RecoveryTarget extends ShardTarget implements RecoveryTargetHandler { +public class RecoveryTarget extends ReplicationTarget implements RecoveryTargetHandler { private static final String RECOVERY_PREFIX = "recovery."; @@ -90,7 +90,7 @@ public class RecoveryTarget extends ShardTarget implements RecoveryTargetHandler * @param sourceNode source node of the recovery where we recover from * @param listener called when recovery is completed/failed */ - public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ShardTargetListener listener) { + public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, ReplicationListener listener) { super("recovery_status", indexShard, indexShard.recoveryState().getIndex(), listener); this.cancellableThreads = new CancellableThreads(); this.sourceNode = sourceNode; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java similarity index 86% rename from server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java index 079a60526ea74..e0ac90ef12ba2 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetCollection.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java @@ -51,14 +51,14 @@ import java.util.concurrent.ConcurrentMap; /** - * This class holds a collection of all on going events on the current node (i.e., the node is the target node - * of those events). The class is used to guarantee concurrent semantics such that once a recoveries was done/cancelled/failed - * no other thread will be able to find it. Last, the {@link ShardTargetRef} inner class verifies that recovery temporary files + * This class holds a collection of all on going replication events on the current node (i.e., the node is the target node + * of those events). The class is used to guarantee concurrent semantics such that once an event was done/cancelled/failed + * no other thread will be able to find it. Last, the {@link ReplicationRef} inner class verifies that temporary files * and store will only be cleared once on going usage is finished. * * @opensearch.internal */ -public class ShardTargetCollection { +public class ReplicationCollection { /** This is the single source of truth for ongoing target events. If it's not here, it was canceled or done */ private final ConcurrentMap onGoingTargetEvents = ConcurrentCollections.newConcurrentMap(); @@ -66,7 +66,7 @@ public class ShardTargetCollection { private final Logger logger; private final ThreadPool threadPool; - public ShardTargetCollection(Logger logger, ThreadPool threadPool) { + public ReplicationCollection(Logger logger, ThreadPool threadPool) { this.logger = logger; this.threadPool = threadPool; } @@ -86,7 +86,7 @@ private void startInternal(T target, TimeValue activityTimeout) { assert existingTarget == null : "found two Target instances with the same id"; logger.trace("started {}", target.description()); threadPool.schedule( - new ShardTargetMonitor(target.getId(), target.lastAccessTime(), activityTimeout), + new ReplicationMonitor(target.getId(), target.lastAccessTime(), activityTimeout), activityTimeout, ThreadPool.Names.GENERIC ); @@ -143,23 +143,23 @@ public T getTarget(long id) { } /** - * gets the {@link ShardTarget } for a given id. The ShardTarget returned has it's ref count already incremented - * to make sure it's safe to use. However, you must call {@link ShardTarget#decRef()} when you are done with it, typically + * gets the {@link ReplicationTarget } for a given id. The ShardTarget returned has it's ref count already incremented + * to make sure it's safe to use. However, you must call {@link ReplicationTarget#decRef()} when you are done with it, typically * by using this method in a try-with-resources clause. *

* Returns null if recovery is not found */ - public ShardTargetRef get(long id) { + public ReplicationRef get(long id) { T status = onGoingTargetEvents.get(id); if (status != null && status.tryIncRef()) { - return new ShardTargetRef(status); + return new ReplicationRef(status); } return null; } /** Similar to {@link #get(long)} but throws an exception if no target is found */ - public ShardTargetRef getSafe(long id, ShardId shardId) { - ShardTargetRef ref = get(id); + public ReplicationRef getSafe(long id, ShardId shardId) { + ReplicationRef ref = get(id); if (ref == null) { throw new IndexShardClosedException(shardId); } @@ -236,31 +236,31 @@ public boolean cancelForShard(ShardId shardId, String reason) { } /** - * a reference to {@link ShardTarget}, which implements {@link AutoCloseable}. closing the reference - * causes {@link ShardTarget#decRef()} to be called. This makes sure that the underlying resources - * will not be freed until {@link ShardTargetRef#close()} is called. + * a reference to {@link ReplicationTarget}, which implements {@link AutoCloseable}. closing the reference + * causes {@link ReplicationTarget#decRef()} to be called. This makes sure that the underlying resources + * will not be freed until {@link ReplicationRef#close()} is called. * * @opensearch.internal */ - public static class ShardTargetRef extends AutoCloseableRefCounted { + public static class ReplicationRef extends AutoCloseableRefCounted { /** - * Important: {@link ShardTarget#tryIncRef()} should + * Important: {@link ReplicationTarget#tryIncRef()} should * be *successfully* called on status before */ - public ShardTargetRef(T status) { + public ReplicationRef(T status) { super(status); status.setLastAccessTime(); } } - private class ShardTargetMonitor extends AbstractRunnable { + private class ReplicationMonitor extends AbstractRunnable { private final long id; private final TimeValue checkInterval; private volatile long lastSeenAccessTime; - private ShardTargetMonitor(long id, long lastSeenAccessTime, TimeValue checkInterval) { + private ReplicationMonitor(long id, long lastSeenAccessTime, TimeValue checkInterval) { this.id = id; this.checkInterval = checkInterval; this.lastSeenAccessTime = lastSeenAccessTime; diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java similarity index 61% rename from server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java index 7517057a16f88..0666f475d496a 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetListener.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationListener.java @@ -11,13 +11,13 @@ import org.opensearch.OpenSearchException; /** - * Interface for listeners that run when there's a change in replication state + * Interface for listeners that run when there's a change in {@link ReplicationState} * * @opensearch.internal */ -public interface ShardTargetListener { +public interface ReplicationListener { - void onDone(ShardTargetState state); + void onDone(ReplicationState state); - void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure); + void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure); } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java index 7ac31b5a49ea2..0b0d20fc9f17e 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationRequestTracker.java @@ -45,7 +45,7 @@ import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED; /** - * Tracks replication/recovery requests + * Tracks replication requests * * @opensearch.internal */ diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java similarity index 91% rename from server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java index d66ec99e0a6db..7942fa8938dd0 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ShardTargetState.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationState.java @@ -13,6 +13,6 @@ * * @opensearch.internal */ -public interface ShardTargetState { +public interface ReplicationState { } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java similarity index 91% rename from server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java rename to server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index 952cbd2f1d683..a8c97a731ac75 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ShardTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -23,11 +23,11 @@ import java.util.concurrent.atomic.AtomicLong; /** - * Represents the target of an operation performed on a shard + * Represents the target of a replication operation performed on a shard * * @opensearch.internal */ -public abstract class ShardTarget extends AbstractRefCounted { +public abstract class ReplicationTarget extends AbstractRefCounted { private static final AtomicLong ID_GENERATOR = new AtomicLong(); @@ -38,7 +38,7 @@ public abstract class ShardTarget extends AbstractRefCounted { protected final AtomicBoolean finished = new AtomicBoolean(); protected final IndexShard indexShard; - protected final ShardTargetListener listener; + protected final ReplicationListener listener; protected final Logger logger; protected final CancellableThreads cancellableThreads; protected final ReplicationLuceneIndex recoveryStateIndex; @@ -49,13 +49,13 @@ public abstract class ShardTarget extends AbstractRefCounted { protected abstract void onCancel(String reason); - public abstract ShardTargetState state(); + public abstract ReplicationState state(); - public abstract ShardTarget retryCopy(); + public abstract ReplicationTarget retryCopy(); public abstract String description(); - public ShardTargetListener getListener() { + public ReplicationListener getListener() { return listener; } @@ -65,7 +65,7 @@ public CancellableThreads cancellableThreads() { public abstract void notifyListener(Exception e, boolean sendShardFailure); - public ShardTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ShardTargetListener listener) { + public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ReplicationListener listener) { super(name); this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); this.listener = listener; diff --git a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java index dcf54a0ecbfcf..509d1f52daa0d 100644 --- a/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/opensearch/index/replication/RecoveryDuringReplicationTests.java @@ -71,7 +71,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; -import org.opensearch.indices.replication.common.ShardTargetListener; +import org.opensearch.indices.replication.common.ReplicationListener; import java.io.IOException; import java.util.ArrayList; @@ -809,7 +809,7 @@ public BlockingTarget( CountDownLatch releaseRecovery, IndexShard shard, DiscoveryNode sourceNode, - ShardTargetListener listener, + ReplicationListener listener, Logger logger ) { super(shard, sourceNode, listener); diff --git a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java index 6428a775a92de..5224a54a35e96 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RecoveryTests.java @@ -69,8 +69,8 @@ import org.opensearch.index.store.Store; import org.opensearch.index.translog.SnapshotMatchers; import org.opensearch.index.translog.Translog; -import org.opensearch.indices.replication.common.ShardTargetListener; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import java.io.IOException; import java.util.HashMap; @@ -451,14 +451,14 @@ public long addDocument(Iterable doc) throws IOExcepti IndexShard replica = group.addReplica(); expectThrows( Exception.class, - () -> group.recoverReplica(replica, (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new ShardTargetListener() { + () -> group.recoverReplica(replica, (shard, sourceNode) -> new RecoveryTarget(shard, sourceNode, new ReplicationListener() { @Override - public void onDone(ShardTargetState state) { + public void onDone(ReplicationState state) { throw new AssertionError("recovery must fail"); } @Override - public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { assertThat(ExceptionsHelper.unwrap(e, IOException.class).getMessage(), equalTo("simulated")); } })) diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java index e77b0753d5497..e341dfd086ea4 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java @@ -38,9 +38,9 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; -import org.opensearch.indices.replication.common.ShardTargetCollection; -import org.opensearch.indices.replication.common.ShardTargetListener; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationCollection; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; @@ -52,26 +52,26 @@ import static org.hamcrest.Matchers.lessThan; public class RecoveriesCollectionTests extends OpenSearchIndexLevelReplicationTestCase { - static final ShardTargetListener listener = new ShardTargetListener() { + static final ReplicationListener listener = new ReplicationListener() { @Override - public void onDone(ShardTargetState state) { + public void onDone(ReplicationState state) { } @Override - public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { } }; public void testLastAccessTimeUpdate() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); + final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); - try (ShardTargetCollection.ShardTargetRef status = collection.get(recoveryId)) { + try (ReplicationCollection.ReplicationRef status = collection.get(recoveryId)) { final long lastSeenTime = status.get().lastAccessTime(); assertBusy(() -> { - try (ShardTargetCollection.ShardTargetRef currentStatus = collection.get(recoveryId)) { + try (ReplicationCollection.ReplicationRef currentStatus = collection.get(recoveryId)) { assertThat("access time failed to update", lastSeenTime, lessThan(currentStatus.get().lastAccessTime())); } }); @@ -83,17 +83,17 @@ public void testLastAccessTimeUpdate() throws Exception { public void testRecoveryTimeout() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); + final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); final AtomicBoolean failed = new AtomicBoolean(); final CountDownLatch latch = new CountDownLatch(1); - final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new ShardTargetListener() { + final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new ReplicationListener() { @Override - public void onDone(ShardTargetState state) { + public void onDone(ReplicationState state) { latch.countDown(); } @Override - public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { failed.set(true); latch.countDown(); } @@ -110,10 +110,10 @@ public void onFailure(ShardTargetState state, OpenSearchException e, boolean sen public void testRecoveryCancellation() throws Exception { try (ReplicationGroup shards = createGroup(0)) { - final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); + final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); final long recoveryId2 = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica()); - try (ShardTargetCollection.ShardTargetRef recoveryRef = collection.get(recoveryId)) { + try (ReplicationCollection.ReplicationRef recoveryRef = collection.get(recoveryId)) { ShardId shardId = recoveryRef.get().indexShard().shardId(); assertTrue("failed to cancel recoveries", collection.cancelForShard(shardId, "test")); assertThat("all recoveries should be cancelled", collection.size(), equalTo(0)); @@ -129,7 +129,7 @@ public void testResetRecovery() throws Exception { shards.startAll(); int numDocs = randomIntBetween(1, 15); shards.indexDocs(numDocs); - final ShardTargetCollection collection = new ShardTargetCollection<>(logger, threadPool); + final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); IndexShard shard = shards.addReplica(); final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shard); RecoveryTarget recoveryTarget = collection.getTarget(recoveryId); @@ -152,7 +152,7 @@ public void testResetRecovery() throws Exception { String resetTempFileName = resetRecovery.getTempNameForFile("foobar"); assertNotEquals(tempFileName, resetTempFileName); assertEquals(currentAsTarget, shard.recoveryStats().currentAsTarget()); - try (ShardTargetCollection.ShardTargetRef newRecoveryRef = collection.get(resetRecoveryId)) { + try (ReplicationCollection.ReplicationRef newRecoveryRef = collection.get(resetRecoveryId)) { shards.recoverReplica(shard, (s, n) -> { assertSame(s, newRecoveryRef.get().indexShard()); return newRecoveryRef.get(); @@ -163,15 +163,15 @@ public void testResetRecovery() throws Exception { } } - long startRecovery(ShardTargetCollection collection, DiscoveryNode sourceNode, IndexShard shard) { + long startRecovery(ReplicationCollection collection, DiscoveryNode sourceNode, IndexShard shard) { return startRecovery(collection, sourceNode, shard, listener, TimeValue.timeValueMinutes(60)); } long startRecovery( - ShardTargetCollection collection, + ReplicationCollection collection, DiscoveryNode sourceNode, IndexShard indexShard, - ShardTargetListener listener, + ReplicationListener listener, TimeValue timeValue ) { final DiscoveryNode rNode = getDiscoveryNode(indexShard.routingEntry().currentNodeId()); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 36c73963b9c26..298fdcaea6465 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -94,8 +94,8 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.recovery.StartRecoveryRequest; -import org.opensearch.indices.replication.common.ShardTargetListener; -import org.opensearch.indices.replication.common.ShardTargetState; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.OpenSearchBlobStoreRepositoryIntegTestCase; @@ -141,14 +141,14 @@ public abstract class IndexShardTestCase extends OpenSearchTestCase { } }; - protected static final ShardTargetListener recoveryListener = new ShardTargetListener() { + protected static final ReplicationListener recoveryListener = new ReplicationListener() { @Override - public void onDone(ShardTargetState state) { + public void onDone(ReplicationState state) { } @Override - public void onFailure(ShardTargetState state, OpenSearchException e, boolean sendShardFailure) { + public void onFailure(ReplicationState state, OpenSearchException e, boolean sendShardFailure) { throw new AssertionError(e); } }; From 50c4e6ecb14c48f50ab0188b5994b3f0aed142ff Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Thu, 19 May 2022 12:53:25 -0700 Subject: [PATCH 10/10] rename and doc changes Signed-off-by: Poojita Raj --- .../recovery/PeerRecoveryTargetService.java | 10 +++---- .../indices/recovery/RecoveryTarget.java | 14 +++++----- .../common/ReplicationCollection.java | 26 +++++++++---------- .../replication/common/ReplicationTarget.java | 17 +++++++----- ...s.java => ReplicationCollectionTests.java} | 4 +-- 5 files changed, 38 insertions(+), 33 deletions(-) rename server/src/test/java/org/opensearch/recovery/{RecoveriesCollectionTests.java => ReplicationCollectionTests.java} (97%) diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 37b16212a35ee..e13022afa81ba 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -212,7 +212,7 @@ protected void retryRecovery(final long recoveryId, final String reason, TimeVal } private void retryRecovery(final long recoveryId, final TimeValue retryAfter, final TimeValue activityTimeout) { - RecoveryTarget newTarget = onGoingRecoveries.resetRecovery(recoveryId, activityTimeout); + RecoveryTarget newTarget = onGoingRecoveries.reset(recoveryId, activityTimeout); if (newTarget != null) { threadPool.scheduleUnlessShuttingDown(retryAfter, ThreadPool.Names.GENERIC, new RecoveryRunner(newTarget.getId())); } @@ -241,7 +241,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi final IndexShard indexShard = recoveryTarget.indexShard(); indexShard.preRecovery(); assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node"; - logger.trace("{} preparing shard for peer recovery", recoveryTarget.indexShard().shardId()); + logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId()); indexShard.prepareForIndexRecovery(); final long startingSeqNo = indexShard.recoverLocallyUpToGlobalCheckpoint(); assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG @@ -292,7 +292,7 @@ public static StartRecoveryRequest getStartRecoveryRequest( long startingSeqNo ) { final StartRecoveryRequest request; - logger.trace("{} collecting local files for [{}]", recoveryTarget.indexShard().shardId(), recoveryTarget.sourceNode()); + logger.trace("{} collecting local files for [{}]", recoveryTarget.shardId(), recoveryTarget.sourceNode()); Store.MetadataSnapshot metadataSnapshot; try { @@ -335,9 +335,9 @@ public static StartRecoveryRequest getStartRecoveryRequest( } metadataSnapshot = Store.MetadataSnapshot.EMPTY; } - logger.trace("{} local file count [{}]", recoveryTarget.indexShard().shardId(), metadataSnapshot.size()); + logger.trace("{} local file count [{}]", recoveryTarget.shardId(), metadataSnapshot.size()); request = new StartRecoveryRequest( - recoveryTarget.indexShard().shardId(), + recoveryTarget.shardId(), recoveryTarget.indexShard().routingEntry().allocationId().getId(), recoveryTarget.sourceNode(), localNode, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java index 052ae035db30a..92897ab19ad64 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java @@ -97,7 +97,7 @@ public RecoveryTarget(IndexShard indexShard, DiscoveryNode sourceNode, Replicati indexShard.recoveryStats().incCurrentAsTarget(); this.store = indexShard.store(); final String tempFilePrefix = getPrefix() + UUIDs.randomBase64UUID() + "."; - this.multiFileWriter = new MultiFileWriter(indexShard.store(), recoveryStateIndex, tempFilePrefix, logger, this::ensureRefCount); + this.multiFileWriter = new MultiFileWriter(indexShard.store(), stateIndex, tempFilePrefix, logger, this::ensureRefCount); store.incRef(); } @@ -149,11 +149,11 @@ public void notifyListener(Exception e, boolean sendShardFailure) { * Closes the current recovery target and waits up to a certain timeout for resources to be freed. * Returns true if resetting the recovery was successful, false if the recovery target is already cancelled / failed or marked as done. */ - public boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException { + public boolean reset(CancellableThreads newTargetCancellableThreads) throws IOException { final long recoveryId = getId(); if (finished.compareAndSet(false, true)) { try { - logger.debug("reset of recovery with shard {} and id [{}]", indexShard.shardId(), recoveryId); + logger.debug("reset of recovery with shard {} and id [{}]", shardId(), recoveryId); } finally { // release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now. decRef(); @@ -163,7 +163,7 @@ public boolean resetRecovery(CancellableThreads newTargetCancellableThreads) thr } catch (CancellableThreads.ExecutionCancelledException e) { logger.trace( "new recovery target cancelled for shard {} while waiting on old recovery target with id [{}] to close", - indexShard.shardId(), + shardId(), recoveryId ); return false; @@ -241,7 +241,7 @@ protected void closeInternal() { @Override public String toString() { - return indexShard.shardId() + " [" + getId() + "]"; + return shardId() + " [" + getId() + "]"; } @Override @@ -332,7 +332,7 @@ public void indexTranslogOperations( translog.totalOperations(totalTranslogOps); assert indexShard().recoveryState() == state(); if (indexShard().state() != IndexShardState.RECOVERING) { - throw new IndexShardNotRecoveringException(indexShard.shardId(), indexShard().state()); + throw new IndexShardNotRecoveringException(shardId(), indexShard().state()); } /* * The maxSeenAutoIdTimestampOnPrimary received from the primary is at least the highest auto_id_timestamp from any operation @@ -418,7 +418,7 @@ public void cleanFiles( final String translogUUID = Translog.createEmptyTranslog( indexShard.shardPath().resolveTranslog(), globalCheckpoint, - indexShard.shardId(), + shardId(), indexShard.getPendingPrimaryTerm() ); store.associateIndexWithNewTranslog(translogUUID); diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java index e0ac90ef12ba2..609825eb5227b 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java @@ -72,9 +72,9 @@ public ReplicationCollection(Logger logger, ThreadPool threadPool) { } /** - * Starts are new recovery for the given shard, source node and state + * Starts a new target event for the given shard, source node and state * - * @return the id of the new recovery. + * @return the id of the new target event. */ public long start(T target, TimeValue activityTimeout) { startInternal(target, activityTimeout); @@ -93,21 +93,21 @@ private void startInternal(T target, TimeValue activityTimeout) { } /** - * Resets the recovery and performs a recovery restart on the currently recovering index shard + * Resets the target event and performs a restart on the current index shard * * @see IndexShard#performRecoveryRestart() - * @return newly created RecoveryTarget + * @return newly created Target */ @SuppressWarnings(value = "unchecked") - public T resetRecovery(final long recoveryId, final TimeValue activityTimeout) { + public T reset(final long id, final TimeValue activityTimeout) { T oldTarget = null; final T newTarget; try { synchronized (onGoingTargetEvents) { - // swap recovery targets in a synchronized block to ensure that the newly added recovery target is picked up by - // cancelRecoveriesForShard whenever the old recovery target is picked up - oldTarget = onGoingTargetEvents.remove(recoveryId); + // swap targets in a synchronized block to ensure that the newly added target is picked up by + // cancelForShard whenever the old target is picked up + oldTarget = onGoingTargetEvents.remove(id); if (oldTarget == null) { return null; } @@ -116,8 +116,8 @@ public T resetRecovery(final long recoveryId, final TimeValue activityTimeout) { startInternal(newTarget, activityTimeout); } - // Closes the current recovery target - boolean successfulReset = oldTarget.resetRecovery(newTarget.cancellableThreads()); + // Closes the current target + boolean successfulReset = oldTarget.reset(newTarget.cancellableThreads()); if (successfulReset) { logger.trace("restarted {}, previous id [{}]", newTarget.description(), oldTarget.getId()); return newTarget; @@ -127,7 +127,7 @@ public T resetRecovery(final long recoveryId, final TimeValue activityTimeout) { newTarget.description(), oldTarget.getId() ); - cancel(newTarget.getId(), "recovery cancelled during reset"); + cancel(newTarget.getId(), "cancelled during reset"); return null; } } catch (Exception e) { @@ -147,7 +147,7 @@ public T getTarget(long id) { * to make sure it's safe to use. However, you must call {@link ReplicationTarget#decRef()} when you are done with it, typically * by using this method in a try-with-resources clause. *

- * Returns null if recovery is not found + * Returns null if target event is not found */ public ReplicationRef get(long id) { T status = onGoingTargetEvents.get(id); @@ -203,7 +203,7 @@ public void markAsDone(long id) { } } - /** the number of ongoing recoveries */ + /** the number of ongoing target events */ public int size() { return onGoingTargetEvents.size(); } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index a8c97a731ac75..0192270907fd2 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -12,11 +12,11 @@ import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; -import org.opensearch.common.UUIDs; import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.AbstractRefCounted; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.ShardId; import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; @@ -37,11 +37,12 @@ public abstract class ReplicationTarget extends AbstractRefCounted { private final long id; protected final AtomicBoolean finished = new AtomicBoolean(); + private final ShardId shardId; protected final IndexShard indexShard; protected final ReplicationListener listener; protected final Logger logger; protected final CancellableThreads cancellableThreads; - protected final ReplicationLuceneIndex recoveryStateIndex; + protected final ReplicationLuceneIndex stateIndex; protected abstract String getPrefix(); @@ -65,14 +66,14 @@ public CancellableThreads cancellableThreads() { public abstract void notifyListener(Exception e, boolean sendShardFailure); - public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIndex recoveryStateIndex, ReplicationListener listener) { + public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIndex stateIndex, ReplicationListener listener) { super(name); this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); this.listener = listener; this.id = ID_GENERATOR.incrementAndGet(); - this.recoveryStateIndex = recoveryStateIndex; + this.stateIndex = stateIndex; this.indexShard = indexShard; - final String tempFilePrefix = getPrefix() + UUIDs.randomBase64UUID() + "."; + this.shardId = indexShard.shardId(); // make sure the store is not released until we are done. this.cancellableThreads = new CancellableThreads(); } @@ -81,7 +82,7 @@ public long getId() { return id; } - public abstract boolean resetRecovery(CancellableThreads newTargetCancellableThreads) throws IOException; + public abstract boolean reset(CancellableThreads newTargetCancellableThreads) throws IOException; /** * return the last time this ReplicationStatus was used (based on System.nanoTime() @@ -106,6 +107,10 @@ public IndexShard indexShard() { return indexShard; } + public ShardId shardId() { + return shardId; + } + /** * mark the current replication as done */ diff --git a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java b/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java similarity index 97% rename from server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java rename to server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java index e341dfd086ea4..7587f48503625 100644 --- a/server/src/test/java/org/opensearch/recovery/RecoveriesCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java @@ -51,7 +51,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; -public class RecoveriesCollectionTests extends OpenSearchIndexLevelReplicationTestCase { +public class ReplicationCollectionTests extends OpenSearchIndexLevelReplicationTestCase { static final ReplicationListener listener = new ReplicationListener() { @Override public void onDone(ReplicationState state) { @@ -138,7 +138,7 @@ public void testResetRecovery() throws Exception { IndexShard indexShard = recoveryTarget.indexShard(); Store store = recoveryTarget.store(); String tempFileName = recoveryTarget.getTempNameForFile("foobar"); - RecoveryTarget resetRecovery = collection.resetRecovery(recoveryId, TimeValue.timeValueMinutes(60)); + RecoveryTarget resetRecovery = collection.reset(recoveryId, TimeValue.timeValueMinutes(60)); final long resetRecoveryId = resetRecovery.getId(); assertNotSame(recoveryTarget, resetRecovery); assertNotSame(recoveryTarget.cancellableThreads(), resetRecovery.cancellableThreads());