Skip to content

Commit 85c1353

Browse files
committed
sort: Replace malloc and 0 fill with huge reserve & min 0 fill
sort: Save RAM usage
1 parent 8c98b15 commit 85c1353

File tree

1 file changed

+16
-14
lines changed

1 file changed

+16
-14
lines changed

src/uu/sort/src/chunks.rs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use crate::{
2323
GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo,
2424
};
2525

26+
const MAYBE_L1_CACHE_SIZE: usize = 64 * 1024;
2627
const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024;
2728
const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / size_of::<Range<usize>>();
2829

@@ -180,7 +181,13 @@ pub fn read<T: Read>(
180181
mut buffer,
181182
} = recycled_chunk;
182183
if buffer.len() < carry_over.len() {
183-
buffer.resize(carry_over.len() + 10 * 1024, 0);
184+
// keep cost of 0 fill minimal
185+
// but avoid cost of allocation by reserving huge size too
186+
buffer.resize(carry_over.len(), 0);
187+
let new_len = (carry_over.len() * 2)
188+
.max(MAYBE_L1_CACHE_SIZE)
189+
.min(carry_over.len() + 16 * 1024 * 1024);
190+
buffer.reserve(new_len - buffer.len());
184191
}
185192
buffer[..carry_over.len()].copy_from_slice(carry_over);
186193
let (read, should_continue) = read_to_buffer(
@@ -252,9 +259,6 @@ fn parse_lines<'a>(
252259
assert!(line_data.parsed_floats.is_empty());
253260
assert!(line_data.line_num_floats.is_empty());
254261
token_buffer.clear();
255-
if token_buffer.capacity() > MAX_TOKEN_BUFFER_ELEMS {
256-
token_buffer.shrink_to(MAX_TOKEN_BUFFER_ELEMS);
257-
}
258262
const SMALL_CHUNK_BYTES: usize = 64 * 1024;
259263
let mut estimated = (*line_count_hint).max(1);
260264
let mut exact_line_count = None;
@@ -267,8 +271,8 @@ fn parse_lines<'a>(
267271
exact_line_count = Some(count);
268272
estimated = count;
269273
} else if estimated == 1 {
270-
const LINE_LEN_HINT: usize = 32;
271-
estimated = (read.len() / LINE_LEN_HINT).max(1);
274+
const LINE_LEN_HINT: usize = 128;
275+
estimated = (read.len() / LINE_LEN_HINT).clamp(1, 1024);
272276
}
273277
lines.reserve(estimated);
274278
if settings.precomputed.selections_per_line > 0 {
@@ -349,12 +353,9 @@ fn read_to_buffer<T: Read>(
349353
if max_buffer_size > buffer.len() {
350354
// we can grow the buffer
351355
let prev_len = buffer.len();
352-
let target = if buffer.len() < max_buffer_size / 2 {
353-
buffer.len().saturating_mul(2)
354-
} else {
355-
max_buffer_size
356-
};
357-
buffer.resize(target.min(max_buffer_size), 0);
356+
let grow_by = (max_buffer_size - prev_len).min(MAYBE_L1_CACHE_SIZE);
357+
buffer.reserve(grow_by);
358+
buffer.resize(prev_len + MAYBE_L1_CACHE_SIZE, 0);
358359
read_target = &mut buffer[prev_len..];
359360
continue;
360361
}
@@ -374,8 +375,9 @@ fn read_to_buffer<T: Read>(
374375

375376
// We need to read more lines
376377
let len = buffer.len();
377-
let grow_by = (len / 2).max(1024 * 1024);
378-
buffer.resize(len + grow_by, 0);
378+
let grow_by = len.clamp(MAYBE_L1_CACHE_SIZE, 16 * 1024 * 1024);
379+
buffer.reserve(grow_by);
380+
buffer.resize(len + MAYBE_L1_CACHE_SIZE, 0);
379381
read_target = &mut buffer[len..];
380382
} else {
381383
// This file has been fully read.

0 commit comments

Comments
 (0)