@@ -23,6 +23,7 @@ use crate::{
2323 GeneralBigDecimalParseResult , GlobalSettings , Line , SortMode , numeric_str_cmp:: NumInfo ,
2424} ;
2525
26+ const MAYBE_L1_CACHE_SIZE : usize = 64 * 1024 ;
2627const MAX_TOKEN_BUFFER_BYTES : usize = 4 * 1024 * 1024 ;
2728const MAX_TOKEN_BUFFER_ELEMS : usize = MAX_TOKEN_BUFFER_BYTES / size_of :: < Range < usize > > ( ) ;
2829
@@ -180,7 +181,13 @@ pub fn read<T: Read>(
180181 mut buffer,
181182 } = recycled_chunk;
182183 if buffer. len ( ) < carry_over. len ( ) {
183- buffer. resize ( carry_over. len ( ) + 10 * 1024 , 0 ) ;
184+ // keep cost of 0 fill minimal
185+ // but avoid cost of allocation by reserving huge size too
186+ buffer. resize ( carry_over. len ( ) , 0 ) ;
187+ let new_len = ( carry_over. len ( ) * 2 )
188+ . max ( MAYBE_L1_CACHE_SIZE )
189+ . min ( carry_over. len ( ) + 16 * 1024 * 1024 ) ;
190+ buffer. reserve ( new_len - buffer. len ( ) ) ;
184191 }
185192 buffer[ ..carry_over. len ( ) ] . copy_from_slice ( carry_over) ;
186193 let ( read, should_continue) = read_to_buffer (
@@ -252,9 +259,6 @@ fn parse_lines<'a>(
252259 assert ! ( line_data. parsed_floats. is_empty( ) ) ;
253260 assert ! ( line_data. line_num_floats. is_empty( ) ) ;
254261 token_buffer. clear ( ) ;
255- if token_buffer. capacity ( ) > MAX_TOKEN_BUFFER_ELEMS {
256- token_buffer. shrink_to ( MAX_TOKEN_BUFFER_ELEMS ) ;
257- }
258262 const SMALL_CHUNK_BYTES : usize = 64 * 1024 ;
259263 let mut estimated = ( * line_count_hint) . max ( 1 ) ;
260264 let mut exact_line_count = None ;
@@ -267,8 +271,8 @@ fn parse_lines<'a>(
267271 exact_line_count = Some ( count) ;
268272 estimated = count;
269273 } else if estimated == 1 {
270- const LINE_LEN_HINT : usize = 32 ;
271- estimated = ( read. len ( ) / LINE_LEN_HINT ) . max ( 1 ) ;
274+ const LINE_LEN_HINT : usize = 128 ;
275+ estimated = ( read. len ( ) / LINE_LEN_HINT ) . clamp ( 1 , 1024 ) ;
272276 }
273277 lines. reserve ( estimated) ;
274278 if settings. precomputed . selections_per_line > 0 {
@@ -349,12 +353,9 @@ fn read_to_buffer<T: Read>(
349353 if max_buffer_size > buffer. len ( ) {
350354 // we can grow the buffer
351355 let prev_len = buffer. len ( ) ;
352- let target = if buffer. len ( ) < max_buffer_size / 2 {
353- buffer. len ( ) . saturating_mul ( 2 )
354- } else {
355- max_buffer_size
356- } ;
357- buffer. resize ( target. min ( max_buffer_size) , 0 ) ;
356+ let grow_by = ( max_buffer_size - prev_len) . min ( MAYBE_L1_CACHE_SIZE ) ;
357+ buffer. reserve ( grow_by) ;
358+ buffer. resize ( prev_len + MAYBE_L1_CACHE_SIZE , 0 ) ;
358359 read_target = & mut buffer[ prev_len..] ;
359360 continue ;
360361 }
@@ -374,8 +375,9 @@ fn read_to_buffer<T: Read>(
374375
375376 // We need to read more lines
376377 let len = buffer. len ( ) ;
377- let grow_by = ( len / 2 ) . max ( 1024 * 1024 ) ;
378- buffer. resize ( len + grow_by, 0 ) ;
378+ let grow_by = len. clamp ( MAYBE_L1_CACHE_SIZE , 16 * 1024 * 1024 ) ;
379+ buffer. reserve ( grow_by) ;
380+ buffer. resize ( len + MAYBE_L1_CACHE_SIZE , 0 ) ;
379381 read_target = & mut buffer[ len..] ;
380382 } else {
381383 // This file has been fully read.
0 commit comments