Skip to content

Commit 089748b

Browse files
committed
[HUDI-3900] [UBER] Support log compaction action for MOR tables
1 parent 6456bd3 commit 089748b

81 files changed

Lines changed: 4105 additions & 232 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
140140
protected transient Timer.Context writeTimer = null;
141141
protected transient Timer.Context compactionTimer;
142142
protected transient Timer.Context clusteringTimer;
143+
protected transient Timer.Context logCompactionTimer;
143144

144145
protected transient AsyncCleanerService asyncCleanerService;
145146
protected transient AsyncArchiveService asyncArchiveService;
@@ -362,7 +363,7 @@ public void bootstrap(Option<Map<String, String>> extraMetadata) {
362363
protected void rollbackFailedBootstrap() {
363364
LOG.info("Rolling back pending bootstrap if present");
364365
HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
365-
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
366+
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingMajorAndMinorCompaction();
366367
Option<String> instant = Option.fromJavaOptional(
367368
inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst());
368369
if (instant.isPresent() && HoodieTimeline.compareTimestamps(instant.get(), HoodieTimeline.LESSER_THAN_OR_EQUALS,
@@ -558,6 +559,15 @@ protected void runTableServicesInline(HoodieTable table, HoodieCommitMetadata me
558559
inlineScheduleCompaction(extraMetadata);
559560
}
560561

562+
// Do an inline log compaction if enabled
563+
if (config.inlineLogCompactionEnabled()) {
564+
runAnyPendingLogCompactions(table);
565+
metadata.addMetadata(HoodieCompactionConfig.INLINE_LOG_COMPACT.key(), "true");
566+
inlineLogCompact(extraMetadata);
567+
} else {
568+
metadata.addMetadata(HoodieCompactionConfig.INLINE_LOG_COMPACT.key(), "false");
569+
}
570+
561571
// Do an inline clustering if enabled
562572
if (config.inlineClusteringEnabled()) {
563573
runAnyPendingClustering(table);
@@ -585,6 +595,14 @@ protected void runAnyPendingCompactions(HoodieTable table) {
585595
});
586596
}
587597

598+
protected void runAnyPendingLogCompactions(HoodieTable table) {
599+
table.getActiveTimeline().getWriteTimeline().filterPendingLogCompactionTimeline().getInstants()
600+
.forEach(instant -> {
601+
LOG.info("Running previously failed inflight log compaction at instant " + instant);
602+
logCompact(instant.getTimestamp(), true);
603+
});
604+
}
605+
588606
protected void runAnyPendingClustering(HoodieTable table) {
589607
table.getActiveTimeline().filterPendingReplaceTimeline().getInstants().forEach(instant -> {
590608
Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(table.getMetaClient(), instant);
@@ -1071,13 +1089,60 @@ public abstract void commitCompaction(String compactionInstantTime, HoodieCommit
10711089
*/
10721090
protected abstract void completeCompaction(HoodieCommitMetadata metadata, HoodieTable table, String compactionCommitTime);
10731091

1092+
/**
1093+
* Schedules a new log compaction instant.
1094+
* @param extraMetadata Extra Metadata to be stored
1095+
*/
1096+
public Option<String> scheduleLogCompaction(Option<Map<String, String>> extraMetadata) throws HoodieIOException {
1097+
String instantTime = HoodieActiveTimeline.createNewInstantTime();
1098+
return scheduleLogCompactionAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty();
1099+
}
1100+
1101+
/**
1102+
* Schedules a new log compaction instant with passed-in instant time.
1103+
* @param instantTime Log Compaction Instant Time
1104+
* @param extraMetadata Extra Metadata to be stored
1105+
*/
1106+
public boolean scheduleLogCompactionAtInstant(String instantTime, Option<Map<String, String>> extraMetadata) throws HoodieIOException {
1107+
return scheduleTableService(instantTime, extraMetadata, TableServiceType.LOG_COMPACT).isPresent();
1108+
}
1109+
1110+
/**
1111+
* Performs Log Compaction for the workload stored in instant-time.
1112+
*
1113+
* @param logCompactionInstantTime Log Compaction Instant Time
1114+
* @return Collection of WriteStatus to inspect errors and counts
1115+
*/
1116+
public HoodieWriteMetadata<O> logCompact(String logCompactionInstantTime) {
1117+
return logCompact(logCompactionInstantTime, config.shouldAutoCommit());
1118+
}
1119+
1120+
/**
1121+
* Commit a log compaction operation. Allow passing additional meta-data to be stored in commit instant file.
1122+
*
1123+
* @param logCompactionInstantTime Log Compaction Instant Time
1124+
* @param metadata All the metadata that gets stored along with a commit
1125+
* @param extraMetadata Extra Metadata to be stored
1126+
*/
1127+
public void commitLogCompaction(String logCompactionInstantTime, HoodieCommitMetadata metadata,
1128+
Option<Map<String, String>> extraMetadata) {
1129+
throw new UnsupportedOperationException("Log compaction is not supported yet.");
1130+
}
1131+
1132+
/**
1133+
* Commit Log Compaction and track metrics.
1134+
*/
1135+
protected void completeLogCompaction(HoodieCommitMetadata metadata, HoodieTable<T, I, K, O> table, String logCompactionCommitTime) {
1136+
throw new UnsupportedOperationException("Log compaction is not supported yet.");
1137+
}
1138+
10741139
/**
10751140
* Get inflight time line exclude compaction and clustering.
10761141
* @param metaClient
10771142
* @return
10781143
*/
10791144
private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTableMetaClient metaClient) {
1080-
HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
1145+
HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingMajorAndMinorCompaction();
10811146
HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
10821147
if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
10831148
Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(metaClient, instant);
@@ -1133,7 +1198,7 @@ protected Map<String, Option<HoodiePendingRollbackInfo>> getPendingRollbackInfos
11331198
try {
11341199
String action = rollbackPlan.getInstantToRollback().getAction();
11351200
if (ignoreCompactionAndClusteringInstants) {
1136-
if (!HoodieTimeline.COMPACTION_ACTION.equals(action)) {
1201+
if (!HoodieTimeline.COMPACTION_ACTION.equals(action) && !HoodieTimeline.LOG_COMPACTION_ACTION.equals(action)) {
11371202
boolean isClustering = HoodieTimeline.REPLACE_COMMIT_ACTION.equals(action)
11381203
&& ClusteringUtils.getClusteringPlan(metaClient, new HoodieInstant(true, rollbackPlan.getInstantToRollback().getAction(),
11391204
rollbackPlan.getInstantToRollback().getCommitTime())).isPresent();
@@ -1247,6 +1312,28 @@ protected Option<String> inlineScheduleCompaction(Option<Map<String, String>> ex
12471312
return scheduleCompaction(extraMetadata);
12481313
}
12491314

1315+
/**
1316+
* Ensures compaction instant is in expected state and performs Log Compaction for the workload stored in instant-time.s
1317+
*
1318+
* @param compactionInstantTime Compaction Instant Time
1319+
* @return Collection of Write Status
1320+
*/
1321+
protected HoodieWriteMetadata<O> logCompact(String compactionInstantTime, boolean shouldComplete) {
1322+
throw new UnsupportedOperationException("Log compaction is not supported yet.");
1323+
}
1324+
1325+
/**
1326+
* Performs a log compaction operation on a table, serially before or after an insert/upsert action.
1327+
*/
1328+
protected Option<String> inlineLogCompact(Option<Map<String, String>> extraMetadata) {
1329+
Option<String> logCompactionInstantTimeOpt = scheduleLogCompaction(extraMetadata);
1330+
logCompactionInstantTimeOpt.ifPresent(logCompactInstantTime -> {
1331+
// inline log compaction should auto commit as the user is never given control
1332+
logCompact(logCompactInstantTime, true);
1333+
});
1334+
return logCompactionInstantTimeOpt;
1335+
}
1336+
12501337
/**
12511338
* Schedules a new clustering instant.
12521339
* @param extraMetadata Extra Metadata to be stored
@@ -1342,6 +1429,11 @@ private Option<String> scheduleTableServiceInternal(String instantTime, Option<M
13421429
Option<HoodieCompactionPlan> compactionPlan = createTable(config, hadoopConf)
13431430
.scheduleCompaction(context, instantTime, extraMetadata);
13441431
return compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
1432+
case LOG_COMPACT:
1433+
LOG.info("Scheduling log compaction at instant time :" + instantTime);
1434+
Option<HoodieCompactionPlan> logCompactionPlan = createTable(config, hadoopConf)
1435+
.scheduleLogCompaction(context, instantTime, extraMetadata);
1436+
return logCompactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
13451437
case CLEAN:
13461438
LOG.info("Scheduling cleaning at instant time :" + instantTime);
13471439
Option<HoodieCleanerPlan> cleanerPlan = createTable(config, hadoopConf)

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MetadataConversionUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInst
118118
archivedMetaWrapper.setActionType(ActionType.compaction.name());
119119
break;
120120
}
121+
case HoodieTimeline.LOG_COMPACTION_ACTION: {
122+
HoodieCompactionPlan plan = CompactionUtils.getLogCompactionPlan(metaClient, hoodieInstant.getTimestamp());
123+
archivedMetaWrapper.setHoodieCompactionPlan(plan);
124+
archivedMetaWrapper.setActionType(ActionType.logcompaction.name());
125+
break;
126+
}
121127
default: {
122128
throw new UnsupportedOperationException("Action not fully supported yet");
123129
}

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ public class HoodieCompactionConfig extends HoodieConfig {
114114
+ "but users are expected to trigger async job for execution. If `hoodie.compact.inline` is set to true, regular writers will do both scheduling and "
115115
+ "execution inline for compaction");
116116

117+
public static final ConfigProperty<String> INLINE_LOG_COMPACT = ConfigProperty
118+
.key("hoodie.log.compaction.inline")
119+
.defaultValue("false")
120+
.withDocumentation("When set to true, logcompaction service is triggered after each write. While being "
121+
+ " simpler operationally, this adds extra latency on the write path.");
122+
117123
public static final ConfigProperty<String> INLINE_COMPACT_NUM_DELTA_COMMITS = ConfigProperty
118124
.key("hoodie.compact.inline.max.delta.commits")
119125
.defaultValue("5")
@@ -266,6 +272,20 @@ public class HoodieCompactionConfig extends HoodieConfig {
266272
.sinceVersion("0.11.0")
267273
.withDocumentation("When rewriting data, preserves existing hoodie_commit_time");
268274

275+
public static final ConfigProperty<String> PARTITIONS_FOR_COMPACTION = ConfigProperty
276+
.key("hoodie.compaction.target.partitions")
277+
.defaultValue("")
278+
.withDocumentation("Used by org.apache.hudi.table.action.compact.strategy.SpecificPartitionCompactionStrategy "
279+
+ "to filter the required partitions to compact. This takes a string value with partitions separated by comma. "
280+
+ "Empty value implies no filtering so all the partitions are selected.");
281+
282+
public static final ConfigProperty<String> PARTITIONS_FOR_LOG_COMPACTION = ConfigProperty
283+
.key("hoodie.log.compaction.target.partitions")
284+
.defaultValue("")
285+
.withDocumentation("Used by org.apache.hudi.table.action.compact.plan.generators.HoodieLogCompactionPlanGenerator "
286+
+ "to filter the required partitions to log compact. This takes a string value with partitions separated by comma. "
287+
+ "Empty value implies no filtering so all the partitions are selected.");
288+
269289
/**
270290
* Configs related to specific table types.
271291
*/
@@ -314,6 +334,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
314334
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
315335
+ " useful when storage scheme doesn't support append operation.");
316336

337+
public static final ConfigProperty<String> LOG_COMPACTION_BLOCKS_THRESHOLD = ConfigProperty
338+
.key("hoodie.log.compaction.blocks.threshold")
339+
.defaultValue("5")
340+
.withDocumentation("Log compaction can be scheduled if the no. of log blocks crosses this threshold value.");
341+
317342
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
318343
@Deprecated
319344
public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
@@ -590,6 +615,11 @@ public Builder withScheduleInlineCompaction(Boolean scheduleAsyncCompaction) {
590615
return this;
591616
}
592617

618+
public Builder withInlineLogCompaction(Boolean inlineLogCompaction) {
619+
compactionConfig.setValue(INLINE_LOG_COMPACT, String.valueOf(inlineLogCompaction));
620+
return this;
621+
}
622+
593623
public Builder withInlineCompactionTriggerStrategy(CompactionTriggerStrategy compactionTriggerStrategy) {
594624
compactionConfig.setValue(INLINE_COMPACT_TRIGGER_STRATEGY, compactionTriggerStrategy.name());
595625
return this;
@@ -751,6 +781,21 @@ public Builder withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy f
751781
return this;
752782
}
753783

784+
public Builder withPartitionsForCompaction(String className) {
785+
compactionConfig.setValue(PARTITIONS_FOR_COMPACTION, className);
786+
return this;
787+
}
788+
789+
public Builder withPartitionsForLogCompaction(String className) {
790+
compactionConfig.setValue(PARTITIONS_FOR_LOG_COMPACTION, className);
791+
return this;
792+
}
793+
794+
public Builder withLogCompactionBlocksThreshold(String className) {
795+
compactionConfig.setValue(LOG_COMPACTION_BLOCKS_THRESHOLD, className);
796+
return this;
797+
}
798+
754799
public HoodieCompactionConfig build() {
755800
compactionConfig.setDefaults(HoodieCompactionConfig.class.getName());
756801
// validation

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,11 @@ public boolean populateMetaFields() {
11471147
/**
11481148
* compaction properties.
11491149
*/
1150+
1151+
public int getLogCompactionBlocksThreshold() {
1152+
return getInt(HoodieCompactionConfig.LOG_COMPACTION_BLOCKS_THRESHOLD);
1153+
}
1154+
11501155
public HoodieCleaningPolicy getCleanerPolicy() {
11511156
return HoodieCleaningPolicy.valueOf(getString(HoodieCompactionConfig.CLEANER_POLICY));
11521157
}
@@ -1247,6 +1252,10 @@ public boolean scheduleInlineCompaction() {
12471252
return getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
12481253
}
12491254

1255+
public boolean inlineLogCompactionEnabled() {
1256+
return getBoolean(HoodieCompactionConfig.INLINE_LOG_COMPACT);
1257+
}
1258+
12501259
public CompactionTriggerStrategy getInlineCompactTriggerStrategy() {
12511260
return CompactionTriggerStrategy.valueOf(getString(HoodieCompactionConfig.INLINE_COMPACT_TRIGGER_STRATEGY));
12521261
}
@@ -1328,6 +1337,14 @@ public int getTargetPartitionsPerDayBasedCompaction() {
13281337
return getInt(HoodieCompactionConfig.TARGET_PARTITIONS_PER_DAYBASED_COMPACTION);
13291338
}
13301339

1340+
public String getPartitionsForLogCompaction() {
1341+
return getString(HoodieCompactionConfig.PARTITIONS_FOR_LOG_COMPACTION);
1342+
}
1343+
1344+
public String getPartitionsForCompaction() {
1345+
return getString(HoodieCompactionConfig.PARTITIONS_FOR_COMPACTION);
1346+
}
1347+
13311348
public int getCommitArchivalBatchSize() {
13321349
return getInt(HoodieCompactionConfig.COMMITS_ARCHIVAL_BATCH_SIZE);
13331350
}
@@ -2040,7 +2057,7 @@ public WriteConcurrencyMode getWriteConcurrencyMode() {
20402057
*/
20412058
public Boolean areAnyTableServicesExecutedInline() {
20422059
return areTableServicesEnabled()
2043-
&& (inlineClusteringEnabled() || inlineCompactionEnabled()
2060+
&& (inlineClusteringEnabled() || inlineCompactionEnabled() || inlineLogCompactionEnabled()
20442061
|| (isAutoClean() && !isAsyncClean()) || (isAutoArchive() && !isAsyncArchive()));
20452062
}
20462063

0 commit comments

Comments
 (0)