33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore (ToDO) delim sourcefiles
6+ // spell-checker:ignore (ToDO) delim sourcefiles undelimited
77
88use bstr:: io:: BufReadExt ;
99use clap:: { Arg , ArgAction , ArgMatches , Command , builder:: ValueParser } ;
@@ -254,35 +254,132 @@ fn cut_fields_implicit_out_delim<R: Read, W: Write, M: Matcher>(
254254 Ok ( ( ) )
255255}
256256
257- /// The input delimiter is identical to `newline_char`
257+ /// Streams and filters fields where the record terminator and
258+ /// field delimiter are the same character (specified by `newline_char`)
258259fn cut_fields_newline_char_delim < R : Read , W : Write > (
259260 reader : R ,
260261 out : & mut W ,
261262 ranges : & [ Range ] ,
263+ only_delimited : bool ,
262264 newline_char : u8 ,
263265 out_delim : & [ u8 ] ,
264266) -> UResult < ( ) > {
265- let buf_in = BufReader :: new ( reader) ;
267+ let mut reader = BufReader :: new ( reader) ;
268+ let mut line = Vec :: new ( ) ;
266269
267- let segments: Vec < _ > = buf_in. split ( newline_char) . filter_map ( Result :: ok) . collect ( ) ;
268- let mut print_delim = false ;
270+ // We start at 1 because 'cut' field indexing is 1-based
271+ let mut current_field_idx = 1 ;
272+ let mut first_field_printed = false ;
273+ let mut has_data = false ;
274+ let mut suppressed = false ;
269275
270- for & Range { low, high } in ranges {
271- for i in low..=high {
272- // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0
273- if let Some ( segment) = segments. get ( i - 1 ) {
274- if print_delim {
275- out. write_all ( out_delim) ?;
276+ let mut range_idx = 0 ;
277+
278+ loop {
279+ line. clear ( ) ;
280+
281+ let is_selected = range_idx < ranges. len ( ) && current_field_idx >= ranges[ range_idx] . low ;
282+ let needs_data = is_selected || current_field_idx == 1 ;
283+
284+ let mut has_processed_data = false ;
285+
286+ if needs_data {
287+ // Standard read: copies bytes into `line`
288+ loop {
289+ let buf = reader. fill_buf ( ) ?;
290+ if buf. is_empty ( ) {
291+ break ;
292+ }
293+
294+ has_processed_data = true ;
295+
296+ if let Some ( pos) = memchr:: memchr ( newline_char, buf) {
297+ let amt = pos + 1 ;
298+ line. extend_from_slice ( & buf[ ..amt] ) ;
299+ reader. consume ( amt) ;
300+
301+ break ;
302+ }
303+ let len = buf. len ( ) ;
304+ line. extend_from_slice ( buf) ;
305+ reader. consume ( len) ;
306+ }
307+ } else {
308+ // Zero-allocation skip: scans the buffer and advances the cursor without copying
309+ loop {
310+ let buf = reader. fill_buf ( ) ?;
311+ if buf. is_empty ( ) {
312+ break ; // EOF
313+ }
314+
315+ has_processed_data = true ;
316+
317+ if let Some ( pos) = memchr:: memchr ( newline_char, buf) {
318+ let bytes_to_consume = pos + 1 ;
319+ reader. consume ( bytes_to_consume) ;
320+ break ;
321+ }
322+
323+ let len = buf. len ( ) ;
324+ reader. consume ( len) ;
325+ }
326+ }
327+
328+ if !has_processed_data {
329+ break ;
330+ }
331+ has_data = true ;
332+
333+ // To comply with -s when the stream consists of only a single field.
334+ if current_field_idx == 1 {
335+ let is_eof_next = reader. fill_buf ( ) ?. is_empty ( ) ;
336+
337+ if is_eof_next && line. last ( ) != Some ( & newline_char) {
338+ if only_delimited {
339+ suppressed = true ;
276340 } else {
277- print_delim = true ;
341+ // GNU cut prints the whole line if no delimiter is found.
342+ out. write_all ( & line) ?;
278343 }
279- out. write_all ( segment. as_slice ( ) ) ?;
280- } else {
281344 break ;
282345 }
283346 }
347+
348+ if range_idx < ranges. len ( ) && current_field_idx > ranges[ range_idx] . high {
349+ range_idx += 1 ;
350+
351+ // EARLY EXIT: If we've exhausted all ranges, stop reading the stream entirely.
352+ if range_idx == ranges. len ( ) {
353+ break ;
354+ }
355+ }
356+
357+ // Check if the current field falls inside the current active range
358+ let is_selected = range_idx < ranges. len ( ) && current_field_idx >= ranges[ range_idx] . low ;
359+
360+ if is_selected {
361+ if first_field_printed {
362+ out. write_all ( out_delim) ?;
363+ }
364+
365+ let has_newline = line. last ( ) == Some ( & newline_char) ;
366+ let content = if has_newline {
367+ & line[ ..line. len ( ) - 1 ]
368+ } else {
369+ & line[ ..]
370+ } ;
371+
372+ out. write_all ( content) ?;
373+ first_field_printed = true ;
374+ }
375+
376+ current_field_idx += 1 ;
284377 }
285- out. write_all ( & [ newline_char] ) ?;
378+
379+ if has_data && !suppressed {
380+ out. write_all ( & [ newline_char] ) ?;
381+ }
382+
286383 Ok ( ( ) )
287384}
288385
@@ -297,7 +394,14 @@ fn cut_fields<R: Read, W: Write>(
297394 match field_opts. delimiter {
298395 Delimiter :: Slice ( delim) if delim == [ newline_char] => {
299396 let out_delim = opts. out_delimiter . unwrap_or ( delim) ;
300- cut_fields_newline_char_delim ( reader, out, ranges, newline_char, out_delim)
397+ cut_fields_newline_char_delim (
398+ reader,
399+ out,
400+ ranges,
401+ field_opts. only_delimited ,
402+ newline_char,
403+ out_delim,
404+ )
301405 }
302406 Delimiter :: Slice ( delim) => {
303407 let matcher = ExactMatcher :: new ( delim) ;
0 commit comments