Skip to content

Commit 40fc116

Browse files
committed
[HUDI-3805] Delete existing corrupted requested rollback plan during rollback
1 parent d43b4cd commit 40fc116

3 files changed

Lines changed: 140 additions & 11 deletions

File tree

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
package org.apache.hudi.client;
2020

21-
import org.apache.hadoop.fs.Path;
2221
import org.apache.hudi.async.AsyncArchiveService;
2322
import org.apache.hudi.async.AsyncCleanerService;
2423
import org.apache.hudi.avro.HoodieAvroUtils;
@@ -72,7 +71,6 @@
7271
import org.apache.hudi.exception.HoodieRollbackException;
7372
import org.apache.hudi.exception.HoodieSavepointException;
7473
import org.apache.hudi.index.HoodieIndex;
75-
import org.apache.hudi.metadata.HoodieTableMetadata;
7674
import org.apache.hudi.internal.schema.InternalSchema;
7775
import org.apache.hudi.internal.schema.Type;
7876
import org.apache.hudi.internal.schema.action.InternalSchemaChangeApplier;
@@ -82,6 +80,7 @@
8280
import org.apache.hudi.internal.schema.utils.AvroSchemaEvolutionUtils;
8381
import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
8482
import org.apache.hudi.internal.schema.utils.SerDeHelper;
83+
import org.apache.hudi.metadata.HoodieTableMetadata;
8584
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
8685
import org.apache.hudi.metadata.MetadataPartitionType;
8786
import org.apache.hudi.metrics.HoodieMetrics;
@@ -95,8 +94,9 @@
9594
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
9695

9796
import com.codahale.metrics.Timer;
98-
import org.apache.hadoop.conf.Configuration;
9997
import org.apache.avro.Schema;
98+
import org.apache.hadoop.conf.Configuration;
99+
import org.apache.hadoop.fs.Path;
100100
import org.apache.log4j.LogManager;
101101
import org.apache.log4j.Logger;
102102

@@ -105,11 +105,11 @@
105105
import java.text.ParseException;
106106
import java.util.Collection;
107107
import java.util.Collections;
108-
import java.util.List;
109-
import java.util.Set;
110108
import java.util.HashMap;
111109
import java.util.LinkedHashMap;
110+
import java.util.List;
112111
import java.util.Map;
112+
import java.util.Set;
113113
import java.util.stream.Collectors;
114114
import java.util.stream.Stream;
115115

@@ -1113,9 +1113,28 @@ protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos
11131113
protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos(HoodieTableMetaClient metaClient, boolean ignoreCompactionAndClusteringInstants) {
11141114
List<HoodieInstant> instants = metaClient.getActiveTimeline().filterPendingRollbackTimeline().getInstants().collect(Collectors.toList());
11151115
Map<String, Option<HoodiePendingRollbackInfo>> infoMap = new HashMap<>();
1116-
for (HoodieInstant instant : instants) {
1116+
for (HoodieInstant rollbackInstant : instants) {
1117+
HoodieRollbackPlan rollbackPlan;
1118+
try {
1119+
rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, rollbackInstant);
1120+
} catch (IOException e) {
1121+
if (rollbackInstant.isRequested()) {
1122+
LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", deleting the plan since it's in REQUESTED state", e);
1123+
try {
1124+
metaClient.getActiveTimeline().deletePending(rollbackInstant);
1125+
} catch (HoodieIOException he) {
1126+
LOG.warn("Cannot delete " + rollbackInstant, he);
1127+
continue;
1128+
}
1129+
} else {
1130+
// Here we assume that if the rollback is inflight, the rollback plan is intact
1131+
// in instant.rollback.requested. The exception here can be due to other reasons.
1132+
LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", skip the plan", e);
1133+
}
1134+
continue;
1135+
}
1136+
11171137
try {
1118-
HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, instant);
11191138
String action = rollbackPlan.getInstantToRollback().getAction();
11201139
if (ignoreCompactionAndClusteringInstants) {
11211140
if (!HoodieTimeline.COMPACTION_ACTION.equals(action)) {
@@ -1124,14 +1143,14 @@ protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos
11241143
rollbackPlan.getInstantToRollback().getCommitTime())).isPresent();
11251144
if (!isClustering) {
11261145
String instantToRollback = rollbackPlan.getInstantToRollback().getCommitTime();
1127-
infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
1146+
infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
11281147
}
11291148
}
11301149
} else {
1131-
infoMap.putIfAbsent(rollbackPlan.getInstantToRollback().getCommitTime(), Option.of(new HoodiePendingRollbackInfo(instant, rollbackPlan)));
1150+
infoMap.putIfAbsent(rollbackPlan.getInstantToRollback().getCommitTime(), Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
11321151
}
1133-
} catch (IOException e) {
1134-
LOG.warn("Fetching rollback plan failed for " + infoMap + ", skip the plan", e);
1152+
} catch (Exception e) {
1153+
LOG.warn("Processing rollback plan failed for " + rollbackInstant + ", skip the plan", e);
11351154
}
11361155
}
11371156
return infoMap;

hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import org.apache.hudi.avro.model.HoodieInstantInfo;
2222
import org.apache.hudi.avro.model.HoodieRollbackPlan;
23+
import org.apache.hudi.avro.model.HoodieRollbackRequest;
2324
import org.apache.hudi.common.config.HoodieMetadataConfig;
2425
import org.apache.hudi.common.fs.FSUtils;
2526
import org.apache.hudi.common.model.HoodieBaseFile;
@@ -61,9 +62,11 @@
6162
import java.util.stream.Collectors;
6263
import java.util.stream.Stream;
6364

65+
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
6466
import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
6567
import static org.junit.jupiter.api.Assertions.assertEquals;
6668
import static org.junit.jupiter.api.Assertions.assertFalse;
69+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
6770
import static org.junit.jupiter.api.Assertions.assertThrows;
6871
import static org.junit.jupiter.api.Assertions.assertTrue;
6972

@@ -481,4 +484,107 @@ public void testAutoRollbackInflightCommit() throws Exception {
481484
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
482485
}
483486
}
487+
488+
private static Stream<Arguments> testRollbackWithRequestedRollbackPlanParams() {
489+
return Arrays.stream(new Boolean[][] {
490+
{true, true}, {true, false}, {false, true}, {false, false},
491+
}).map(Arguments::of);
492+
}
493+
494+
@ParameterizedTest
495+
@MethodSource("testRollbackWithRequestedRollbackPlanParams")
496+
public void testRollbackWithRequestedRollbackPlan(boolean enableMetadataTable, boolean isRollbackPlanCorrupted) throws Exception {
497+
// Let's create some commit files and base files
498+
final String p1 = "2022/04/05";
499+
final String p2 = "2022/04/06";
500+
final String commitTime1 = "20220406010101002";
501+
final String commitTime2 = "20220406020601002";
502+
final String commitTime3 = "20220406030611002";
503+
final String rollbackInstantTime = "20220406040611002";
504+
Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
505+
{
506+
put(p1, "id11");
507+
put(p2, "id12");
508+
}
509+
};
510+
Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
511+
{
512+
put(p1, "id21");
513+
put(p2, "id22");
514+
}
515+
};
516+
Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
517+
{
518+
put(p1, "id31");
519+
put(p2, "id32");
520+
}
521+
};
522+
523+
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
524+
.withRollbackUsingMarkers(false)
525+
.withMetadataConfig(
526+
HoodieMetadataConfig.newBuilder()
527+
// Column Stats Index is disabled, since these tests construct tables which are
528+
// not valid (empty commit metadata, invalid parquet files)
529+
.withMetadataIndexColumnStats(false)
530+
.enable(enableMetadataTable)
531+
.build()
532+
)
533+
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
534+
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build())
535+
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
536+
537+
HoodieTestTable testTable = enableMetadataTable
538+
? HoodieMetadataTestTable.of(metaClient, SparkHoodieBackedTableMetadataWriter.create(
539+
metaClient.getHadoopConf(), config, context))
540+
: HoodieTestTable.of(metaClient);
541+
542+
testTable.withPartitionMetaFiles(p1, p2)
543+
.addCommit(commitTime1)
544+
.withBaseFilesInPartitions(partitionAndFileId1)
545+
.addCommit(commitTime2)
546+
.withBaseFilesInPartitions(partitionAndFileId2)
547+
.addInflightCommit(commitTime3)
548+
.withBaseFilesInPartitions(partitionAndFileId3);
549+
550+
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
551+
if (isRollbackPlanCorrupted) {
552+
// Add a corrupted requested rollback plan
553+
FileCreateUtils.createRequestedRollbackFile(metaClient.getBasePath(), rollbackInstantTime, new byte[] {0, 1, 2});
554+
} else {
555+
// Add a valid requested rollback plan to roll back commitTime3
556+
HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan();
557+
List<HoodieRollbackRequest> rollbackRequestList = partitionAndFileId3.keySet().stream()
558+
.map(partition -> new HoodieRollbackRequest(partition, EMPTY_STRING, EMPTY_STRING,
559+
Collections.singletonList(metaClient.getBasePath() + "/" + partition + "/"
560+
+ FileCreateUtils.baseFileName(commitTime3, partitionAndFileId3.get(p1))),
561+
Collections.emptyMap()))
562+
.collect(Collectors.toList());
563+
rollbackPlan.setRollbackRequests(rollbackRequestList);
564+
rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime3, HoodieTimeline.COMMIT_ACTION));
565+
FileCreateUtils.createRequestedRollbackFile(metaClient.getBasePath(), rollbackInstantTime, rollbackPlan);
566+
}
567+
568+
// Rollback commit3
569+
client.rollback(commitTime3);
570+
assertFalse(testTable.inflightCommitExists(commitTime3));
571+
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
572+
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
573+
574+
metaClient.reloadActiveTimeline();
575+
List<HoodieInstant> rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
576+
// Corrupted requested rollback plan should be deleted before scheduling a new one
577+
assertEquals(rollbackInstants.size(), 1);
578+
HoodieInstant rollbackInstant = rollbackInstants.get(0);
579+
assertTrue(rollbackInstant.isCompleted());
580+
581+
if (isRollbackPlanCorrupted) {
582+
// Should create a new rollback instant
583+
assertNotEquals(rollbackInstantTime, rollbackInstant.getTimestamp());
584+
} else {
585+
// Should reuse the rollback instant
586+
assertEquals(rollbackInstantTime, rollbackInstant.getTimestamp());
587+
}
588+
}
589+
}
484590
}

hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ public static void createRequestedRollbackFile(String basePath, String instantTi
244244
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, serializeRollbackPlan(plan).get());
245245
}
246246

247+
public static void createRequestedRollbackFile(String basePath, String instantTime, byte[] content) throws IOException {
248+
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, content);
249+
}
250+
247251
public static void createInflightRollbackFile(String basePath, String instantTime) throws IOException {
248252
createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
249253
}

0 commit comments

Comments
 (0)