33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore (ToDO) delim sourcefiles
6+ // spell-checker:ignore (ToDO) delim sourcefiles undelimited
77
88use bstr:: io:: BufReadExt ;
99use clap:: { Arg , ArgAction , ArgMatches , Command , builder:: ValueParser } ;
@@ -254,35 +254,131 @@ fn cut_fields_implicit_out_delim<R: Read, W: Write, M: Matcher>(
254254 Ok ( ( ) )
255255}
256256
257- /// The input delimiter is identical to `newline_char`
257+ /// Streams and filters fields where the record terminator and
258+ /// field delimiter are the same character (specified by `newline_char`)
258259fn cut_fields_newline_char_delim < R : Read , W : Write > (
259260 reader : R ,
260261 out : & mut W ,
261262 ranges : & [ Range ] ,
263+ only_delimited : bool ,
262264 newline_char : u8 ,
263265 out_delim : & [ u8 ] ,
264266) -> UResult < ( ) > {
265- let buf_in = BufReader :: new ( reader) ;
267+ let mut reader = BufReader :: new ( reader) ;
268+ let mut line = Vec :: new ( ) ;
266269
267- let segments: Vec < _ > = buf_in. split ( newline_char) . filter_map ( Result :: ok) . collect ( ) ;
268- let mut print_delim = false ;
270+ // We start at 1 because 'cut' field indexing is 1-based
271+ let mut current_field_idx = 1 ;
272+ let mut first_field_printed = false ;
273+ let mut has_data = false ;
274+ let mut suppressed = false ;
269275
270- for & Range { low, high } in ranges {
271- for i in low..=high {
272- // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0
273- if let Some ( segment) = segments. get ( i - 1 ) {
274- if print_delim {
275- out. write_all ( out_delim) ?;
276+ let mut range_idx = 0 ;
277+
278+ loop {
279+ line. clear ( ) ;
280+
281+ let is_selected = range_idx < ranges. len ( ) && current_field_idx >= ranges[ range_idx] . low ;
282+ let needs_data = is_selected || current_field_idx == 1 ;
283+
284+ let mut bytes_processed = 0 ;
285+
286+ if needs_data {
287+ // Standard read: copies bytes into `line`
288+ loop {
289+ let buf = reader. fill_buf ( ) ?;
290+ if buf. is_empty ( ) {
291+ break ;
292+ }
293+
294+ if let Some ( pos) = memchr:: memchr ( newline_char, buf) {
295+ let amt = pos + 1 ;
296+ line. extend_from_slice ( & buf[ ..amt] ) ;
297+ reader. consume ( amt) ;
298+ bytes_processed += amt;
299+ break ;
300+ }
301+ let len = buf. len ( ) ;
302+ line. extend_from_slice ( buf) ;
303+ reader. consume ( len) ;
304+ bytes_processed += len;
305+ }
306+ } else {
307+ // Zero-allocation skip: scans the buffer and advances the cursor without copying
308+ loop {
309+ let buf = reader. fill_buf ( ) ?;
310+ if buf. is_empty ( ) {
311+ break ; // EOF
312+ }
313+
314+ if let Some ( pos) = memchr:: memchr ( newline_char, buf) {
315+ let bytes_to_consume = pos + 1 ;
316+ reader. consume ( bytes_to_consume) ;
317+ bytes_processed += bytes_to_consume;
318+ break ;
319+ }
320+
321+ let len = buf. len ( ) ;
322+ reader. consume ( len) ;
323+ bytes_processed += len;
324+ }
325+ }
326+
327+ if bytes_processed == 0 {
328+ break ;
329+ }
330+ has_data = true ;
331+
332+ // To comply with -s when the stream consists of only a single field.
333+ if current_field_idx == 1 {
334+ let is_eof_next = reader. fill_buf ( ) ?. is_empty ( ) ;
335+
336+ if is_eof_next && line. last ( ) != Some ( & newline_char) {
337+ if only_delimited {
338+ suppressed = true ;
276339 } else {
277- print_delim = true ;
340+ // GNU cut prints the whole line if no delimiter is found.
341+ out. write_all ( & line) ?;
278342 }
279- out. write_all ( segment. as_slice ( ) ) ?;
280- } else {
281343 break ;
282344 }
283345 }
346+
347+ if range_idx < ranges. len ( ) && current_field_idx > ranges[ range_idx] . high {
348+ range_idx += 1 ;
349+
350+ // EARLY EXIT: If we've exhausted all ranges, stop reading the stream entirely.
351+ if range_idx >= ranges. len ( ) {
352+ break ;
353+ }
354+ }
355+
356+ // Check if the current field falls inside the current active range
357+ let is_selected = range_idx < ranges. len ( ) && current_field_idx >= ranges[ range_idx] . low ;
358+
359+ if is_selected {
360+ if first_field_printed {
361+ out. write_all ( out_delim) ?;
362+ }
363+
364+ let has_newline = line. last ( ) == Some ( & newline_char) ;
365+ let content = if has_newline {
366+ & line[ ..line. len ( ) - 1 ]
367+ } else {
368+ & line[ ..]
369+ } ;
370+
371+ out. write_all ( content) ?;
372+ first_field_printed = true ;
373+ }
374+
375+ current_field_idx += 1 ;
376+ }
377+
378+ if has_data && !suppressed {
379+ out. write_all ( & [ newline_char] ) ?;
284380 }
285- out . write_all ( & [ newline_char ] ) ? ;
381+
286382 Ok ( ( ) )
287383}
288384
@@ -297,7 +393,14 @@ fn cut_fields<R: Read, W: Write>(
297393 match field_opts. delimiter {
298394 Delimiter :: Slice ( delim) if delim == [ newline_char] => {
299395 let out_delim = opts. out_delimiter . unwrap_or ( delim) ;
300- cut_fields_newline_char_delim ( reader, out, ranges, newline_char, out_delim)
396+ cut_fields_newline_char_delim (
397+ reader,
398+ out,
399+ ranges,
400+ field_opts. only_delimited ,
401+ newline_char,
402+ out_delim,
403+ )
301404 }
302405 Delimiter :: Slice ( delim) => {
303406 let matcher = ExactMatcher :: new ( delim) ;
0 commit comments