@@ -1466,6 +1466,131 @@ mod tests {
14661466 }
14671467 }
14681468
1469+ #[ tokio:: test]
1470+ async fn test_splitting_with_custom_blocksize ( ) {
1471+ let tmp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
1472+ let storage = Storage :: Local ( LocalStorage :: new ( tmp_dir. path ( ) . to_str ( ) . unwrap ( ) ) ) ;
1473+ let block_cache = new_cache_for_test ( ) ;
1474+ let sparse_index_cache = new_cache_for_test ( ) ;
1475+ let blockfile_provider = ArrowBlockfileProvider :: new (
1476+ storage,
1477+ TEST_MAX_BLOCK_SIZE_BYTES ,
1478+ block_cache,
1479+ sparse_index_cache,
1480+ ) ;
1481+ let prefix_path = String :: from ( "" ) ;
1482+ let custom_block_size = 100 * 1024 * 1024 ; // 100 MiB
1483+ let writer = blockfile_provider
1484+ . write :: < & str , Vec < u32 > > (
1485+ BlockfileWriterOptions :: new ( prefix_path. clone ( ) )
1486+ . max_block_size_bytes ( custom_block_size) ,
1487+ )
1488+ . await
1489+ . unwrap ( ) ;
1490+ let id_1 = writer. id ( ) ;
1491+
1492+ let n = 1200 ;
1493+ for i in 0 ..n {
1494+ let key = format ! ( "{:04}" , i) ;
1495+ let value = vec ! [ i] ;
1496+ writer. set ( "key" , key. as_str ( ) , value) . await . unwrap ( ) ;
1497+ }
1498+
1499+ let flusher = writer. commit :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1500+ flusher. flush :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1501+
1502+ let read_options = BlockfileReaderOptions :: new ( id_1, prefix_path. clone ( ) ) ;
1503+ let reader = blockfile_provider
1504+ . read :: < & str , & [ u32 ] > ( read_options)
1505+ . await
1506+ . unwrap ( ) ;
1507+
1508+ for i in 0 ..n {
1509+ let key = format ! ( "{:04}" , i) ;
1510+ let value = reader. get ( "key" , & key) . await . unwrap ( ) . unwrap ( ) ;
1511+ assert_eq ! ( value, [ i] ) ;
1512+ }
1513+
1514+ // Sparse index should have 1 block
1515+ match & reader {
1516+ crate :: BlockfileReader :: ArrowBlockfileReader ( reader) => {
1517+ assert_eq ! ( reader. root. sparse_index. len( ) , 1 ) ;
1518+ assert ! ( reader. root. sparse_index. is_valid( ) ) ;
1519+ }
1520+ _ => panic ! ( "Unexpected reader type" ) ,
1521+ }
1522+
1523+ // Add 5 new entries to the first block
1524+ let writer = blockfile_provider
1525+ . write :: < & str , Vec < u32 > > ( BlockfileWriterOptions :: new ( prefix_path. clone ( ) ) . fork ( id_1) )
1526+ . await
1527+ . unwrap ( ) ;
1528+ let id_2 = writer. id ( ) ;
1529+ for i in 0 ..5 {
1530+ let key = format ! ( "{:05}" , i) ;
1531+ let value = vec ! [ i] ;
1532+ writer. set ( "key" , key. as_str ( ) , value) . await . unwrap ( ) ;
1533+ }
1534+
1535+ let flusher = writer. commit :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1536+ flusher. flush :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1537+
1538+ let read_options = BlockfileReaderOptions :: new ( id_2, prefix_path. clone ( ) ) ;
1539+ let reader = blockfile_provider
1540+ . read :: < & str , & [ u32 ] > ( read_options)
1541+ . await
1542+ . unwrap ( ) ;
1543+ for i in 0 ..5 {
1544+ let key = format ! ( "{:05}" , i) ;
1545+ println ! ( "Getting key: {}" , key) ;
1546+ let value = reader. get ( "key" , & key) . await . unwrap ( ) . unwrap ( ) ;
1547+ assert_eq ! ( value, [ i] ) ;
1548+ }
1549+
1550+ // Sparse index should still have 1 block
1551+ match & reader {
1552+ crate :: BlockfileReader :: ArrowBlockfileReader ( reader) => {
1553+ assert_eq ! ( reader. root. sparse_index. len( ) , 1 ) ;
1554+ assert ! ( reader. root. sparse_index. is_valid( ) ) ;
1555+ }
1556+ _ => panic ! ( "Unexpected reader type" ) ,
1557+ }
1558+
1559+ // Add 1200 more entries, still 1 block
1560+ let writer = blockfile_provider
1561+ . write :: < & str , Vec < u32 > > ( BlockfileWriterOptions :: new ( prefix_path. clone ( ) ) . fork ( id_2) )
1562+ . await
1563+ . unwrap ( ) ;
1564+ let id_3 = writer. id ( ) ;
1565+ for i in n..n * 2 {
1566+ let key = format ! ( "{:04}" , i) ;
1567+ let value = vec ! [ i] ;
1568+ writer. set ( "key" , key. as_str ( ) , value) . await . unwrap ( ) ;
1569+ }
1570+ let flusher = writer. commit :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1571+ flusher. flush :: < & str , Vec < u32 > > ( ) . await . unwrap ( ) ;
1572+
1573+ let read_options = BlockfileReaderOptions :: new ( id_3, prefix_path) ;
1574+ let reader = blockfile_provider
1575+ . read :: < & str , & [ u32 ] > ( read_options)
1576+ . await
1577+ . unwrap ( ) ;
1578+ for i in n..n * 2 {
1579+ let key = format ! ( "{:04}" , i) ;
1580+ let value = reader. get ( "key" , & key) . await . unwrap ( ) . unwrap ( ) ;
1581+ assert_eq ! ( value, [ i] ) ;
1582+ }
1583+
1584+ // Sparse index should have 1 block
1585+ match & reader {
1586+ crate :: BlockfileReader :: ArrowBlockfileReader ( reader) => {
1587+ assert_eq ! ( reader. root. sparse_index. len( ) , 1 ) ;
1588+ assert ! ( reader. root. sparse_index. is_valid( ) ) ;
1589+ }
1590+ _ => panic ! ( "Unexpected reader type" ) ,
1591+ }
1592+ }
1593+
14691594 #[ tokio:: test]
14701595 async fn test_splitting_boundary ( ) {
14711596 let tmp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
@@ -2058,7 +2183,7 @@ mod tests {
20582183 let block_cache = new_cache_for_test ( ) ;
20592184 let root_cache = new_cache_for_test ( ) ;
20602185 let root_manager = RootManager :: new ( storage. clone ( ) , root_cache) ;
2061- let block_manager = BlockManager :: new ( storage. clone ( ) , 8 * 1024 * 1024 , block_cache) ;
2186+ let block_manager = BlockManager :: new ( storage. clone ( ) , 16384 , block_cache) ;
20622187
20632188 // Manually create a v1 blockfile with no counts
20642189 let initial_block = block_manager. create :: < & str , String , UnorderedBlockDelta > ( ) ;
0 commit comments