Skip to content

Commit 804bef4

Browse files
authored
Fix flaky test SegmentReplicationIT.testScrollWithOngoingSegmentReplication (#7572)
This test has flaky failures for two reasons: 1. Fetches list of temporary files on disk starting with ".replication" before the replica has time to flush received chunks to disk. Fixed by wrapping the assertion that a tmp file exists with assertBusy. 2. Asserts that the count of tmp files is exactly the same before/after a scroll request is cleared. However, it is possible that additional tmp files have been written to disk concurrently, causing a count mismatch. Fixed by removing the size assertion. For the sake of this test we don't care if this is the case, as long as the tmp files originally fetched remain after a scroll query is cleared. Signed-off-by: Marc Handalian <handalm@amazon.com>
1 parent af78ef9 commit 804bef4

1 file changed

Lines changed: 13 additions & 9 deletions

File tree

server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,19 +1045,24 @@ public void testScrollWithOngoingSegmentReplication() throws Exception {
10451045
// wait for segrep to start and copy temporary files
10461046
waitForFileCopy.await();
10471047

1048-
// verify replica contains temporary files
1049-
IndexShard replicaShard = getIndexShard(replica, INDEX_NAME);
1050-
List<String> temporaryFiles = Arrays.stream(replicaShard.store().directory().listAll())
1051-
.filter(fileName -> fileName.startsWith(REPLICATION_PREFIX))
1052-
.collect(Collectors.toList());
1053-
logger.info("--> temporaryFiles {}", temporaryFiles);
1054-
assertTrue(temporaryFiles.size() > 0);
1048+
final IndexShard replicaShard = getIndexShard(replica, INDEX_NAME);
1049+
// Wait until replica has written a tmp file to disk.
1050+
List<String> temporaryFiles = new ArrayList<>();
1051+
assertBusy(() -> {
1052+
// verify replica contains temporary files
1053+
temporaryFiles.addAll(
1054+
Arrays.stream(replicaShard.store().directory().listAll())
1055+
.filter(fileName -> fileName.startsWith(REPLICATION_PREFIX))
1056+
.collect(Collectors.toList())
1057+
);
1058+
logger.info("--> temporaryFiles {}", temporaryFiles);
1059+
assertTrue(temporaryFiles.size() > 0);
1060+
});
10551061

10561062
// Clear scroll query, this should clean up files on replica
10571063
client(replica).prepareClearScroll().addScrollId(searchResponse.getScrollId()).get();
10581064

10591065
// verify temporary files still exist
1060-
replicaShard = getIndexShard(replica, INDEX_NAME);
10611066
List<String> temporaryFilesPostClear = Arrays.stream(replicaShard.store().directory().listAll())
10621067
.filter(fileName -> fileName.startsWith(REPLICATION_PREFIX))
10631068
.collect(Collectors.toList());
@@ -1066,7 +1071,6 @@ public void testScrollWithOngoingSegmentReplication() throws Exception {
10661071
// Unblock segment replication
10671072
blockFileCopy.countDown();
10681073

1069-
assertEquals(temporaryFiles.size(), temporaryFilesPostClear.size());
10701074
assertTrue(temporaryFilesPostClear.containsAll(temporaryFiles));
10711075

10721076
// wait for replica to catch up and verify doc count

0 commit comments

Comments
 (0)