@@ -69,6 +69,7 @@ public class ITTestRepairsCommand extends HoodieCLIIntegrationTestBase {
6969 private String duplicatedPartitionPath ;
7070 private String duplicatedPartitionPathWithUpdates ;
7171 private String duplicatedPartitionPathWithUpserts ;
72+ private String duplicatedNoPartitionPath ;
7273 private String repairedOutputPath ;
7374
7475 private HoodieFileFormat fileFormat ;
@@ -78,6 +79,7 @@ public void init() throws Exception {
7879 duplicatedPartitionPath = HoodieTestDataGenerator .DEFAULT_FIRST_PARTITION_PATH ;
7980 duplicatedPartitionPathWithUpdates = HoodieTestDataGenerator .DEFAULT_SECOND_PARTITION_PATH ;
8081 duplicatedPartitionPathWithUpserts = HoodieTestDataGenerator .DEFAULT_THIRD_PARTITION_PATH ;
82+ duplicatedNoPartitionPath = HoodieTestDataGenerator .NO_PARTITION_PATH ;
8183 repairedOutputPath = Paths .get (basePath , "tmp" ).toString ();
8284
8385 HoodieCLI .conf = jsc .hadoopConfiguration ();
@@ -135,6 +137,23 @@ public void init() throws Exception {
135137 .withInserts (HoodieTestDataGenerator .DEFAULT_THIRD_PARTITION_PATH , "7" , dupRecords )
136138 .withInserts (HoodieTestDataGenerator .DEFAULT_THIRD_PARTITION_PATH , "8" , dupRecords );
137139
140+ // init cow table for non-partitioned table tests
141+ String cowNonPartitionedTablePath = Paths .get (basePath , "cow_table_non_partitioned" ).toString ();
142+
143+ // Create cow table and connect
144+ new TableCommand ().createTable (
145+ cowNonPartitionedTablePath , "cow_table_non_partitioned" , HoodieTableType .COPY_ON_WRITE .name (),
146+ "" , TimelineLayoutVersion .VERSION_1 , "org.apache.hudi.common.model.HoodieAvroPayload" );
147+
148+ HoodieSparkWriteableTestTable cowNonPartitionedTable = HoodieSparkWriteableTestTable .of (HoodieCLI .getTableMetaClient (), schema );
149+
150+ cowNonPartitionedTable .addCommit ("20160401010101" )
151+ .withInserts (HoodieTestDataGenerator .NO_PARTITION_PATH , "1" , hoodieRecords1 )
152+ .getFileIdWithLogFile (HoodieTestDataGenerator .NO_PARTITION_PATH );
153+
154+ cowNonPartitionedTable .addCommit ("20160401010202" )
155+ .withInserts (HoodieTestDataGenerator .NO_PARTITION_PATH , "2" , dupRecords );
156+
138157 fileFormat = metaClient .getTableConfig ().getBaseFileFormat ();
139158 }
140159
@@ -232,6 +251,39 @@ public void testDeduplicateWithUpserts(HoodieTableType tableType) throws IOExcep
232251 assertEquals (100 , result .count ());
233252 }
234253
254+ /**
255+ * Test case dry run deduplicate for non-partitioned dataset.
256+ */
257+ @ ParameterizedTest
258+ @ EnumSource (value = HoodieTableType .class )
259+ public void testDeduplicateNoPartitionWithInserts (HoodieTableType tableType ) throws IOException {
260+ String tablePath = Paths .get (basePath , "cow_table_non_partitioned" ).toString ();
261+ connectTableAndReloadMetaClient (tablePath );
262+ HoodieTableFileSystemView fsView = new HoodieTableFileSystemView (metaClient ,
263+ metaClient .getActiveTimeline ().getCommitsTimeline ().filterCompletedInstants (),
264+ fs .listStatus (new Path (Paths .get (tablePath , duplicatedNoPartitionPath ).toString ())));
265+ List <String > filteredStatuses = fsView .getLatestBaseFiles ().map (HoodieBaseFile ::getPath ).collect (Collectors .toList ());
266+ assertEquals (2 , filteredStatuses .size (), "There should be 2 files." );
267+
268+ // Before deduplicate, all files contain 110 records
269+ String [] files = filteredStatuses .toArray (new String [0 ]);
270+ Dataset df = readFiles (files );
271+ assertEquals (110 , df .count ());
272+
273+ // use default value without specifying duplicatedPartitionPath
274+ String cmdStr = String .format ("repair deduplicate --repairedOutputPath %s --sparkMaster %s" ,
275+ repairedOutputPath , "local" );
276+ Object resultForCmd = shell .evaluate (() -> cmdStr );
277+ assertTrue (ShellEvaluationResultUtil .isSuccess (resultForCmd ));
278+ assertEquals (RepairsCommand .DEDUPLICATE_RETURN_PREFIX + repairedOutputPath , resultForCmd .toString ());
279+
280+ // After deduplicate, there are 100 records
281+ FileStatus [] fileStatus = fs .listStatus (new Path (repairedOutputPath ));
282+ files = Arrays .stream (fileStatus ).map (status -> status .getPath ().toString ()).toArray (String []::new );
283+ Dataset result = readFiles (files );
284+ assertEquals (100 , result .count ());
285+ }
286+
235287 /**
236288 * Test case for real run deduplicate.
237289 */
0 commit comments