Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 174 additions & 1 deletion src/uu/tac/src/tac.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,11 +223,99 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
Ok(())
}

/// Make the regex flavor compatible with `regex` crate
///
/// Concretely:
/// - Toggle escaping of (), |, {}
/// - Escape ^ and $ when not at edges
/// - Leave expressions inside [] unchanged
fn translate_regex_flavor(regex: &str) -> String {
let mut result = String::new();
let mut chars = regex.chars().peekable();
let mut inside_brackets = false;
let mut prev_was_backslash = false;
let mut last_char: Option<char> = None;

while let Some(c) = chars.next() {
let is_escaped = prev_was_backslash;
prev_was_backslash = false;

match c {
// Unescape escaped (), |, {} when not inside brackets
'\\' if !inside_brackets && !is_escaped => {
if let Some(&next) = chars.peek() {
if matches!(next, '(' | ')' | '|' | '{' | '}') {
result.push(next);
last_char = Some(next);
chars.next();
continue;
}
}

result.push('\\');
last_char = Some('\\');
prev_was_backslash = true;
}
// Bracket tracking
'[' => {
inside_brackets = true;
result.push(c);
last_char = Some(c);
}
']' => {
inside_brackets = false;
result.push(c);
last_char = Some(c);
}
// Escape (), |, {} when not escaped and outside brackets
'(' | ')' | '|' | '{' | '}' if !inside_brackets && !is_escaped => {
result.push('\\');
result.push(c);
last_char = Some(c);
}
'^' if !inside_brackets && !is_escaped => {
let is_anchor_position = result.is_empty() || matches!(last_char, Some('(' | '|'));
if !is_anchor_position {
result.push('\\');
}
result.push(c);
last_char = Some(c);
}
'$' if !inside_brackets && !is_escaped => {
let next_is_anchor_position = match chars.peek() {
None => true,
Some(&')' | &'|') => true,
Some(&'\\') => {
// Peek two ahead to see if it's \) or \|
let chars_vec: Vec<char> = chars.clone().take(2).collect();
matches!(chars_vec.get(1), Some(&')' | &'|'))
}
_ => false,
};
if !next_is_anchor_position {
result.push('\\');
}
result.push(c);
last_char = Some(c);
}
_ => {
result.push(c);
last_char = Some(c);
}
}
}

result
}

#[allow(clippy::cognitive_complexity)]
fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &str) -> UResult<()> {
// Compile the regular expression pattern if it is provided.
let maybe_pattern = if regex {
match regex::bytes::Regex::new(separator) {
match regex::bytes::RegexBuilder::new(&translate_regex_flavor(separator))
.multi_line(true)
.build()
{
Ok(p) => Some(p),
Err(e) => return Err(TacError::InvalidRegex(e).into()),
}
Expand Down Expand Up @@ -359,3 +447,88 @@ fn try_mmap_path(path: &Path) -> Option<Mmap> {

Some(mmap)
}

#[cfg(test)]
mod tests_hybrid_flavor {
use super::translate_regex_flavor;

#[test]
fn test_grouping_and_alternation() {
assert_eq!(translate_regex_flavor(r"\(abc\)"), r"(abc)");

assert_eq!(translate_regex_flavor(r"(abc)"), r"\(abc\)");

assert_eq!(translate_regex_flavor(r"a\|b"), r"a|b");

assert_eq!(translate_regex_flavor(r"a|b"), r"a\|b");
}

#[test]
fn test_quantifiers() {
assert_eq!(translate_regex_flavor("a+"), "a+");

assert_eq!(translate_regex_flavor("a*"), "a*");

assert_eq!(translate_regex_flavor("a?"), "a?");

assert_eq!(translate_regex_flavor(r"a\+"), r"a\+");

assert_eq!(translate_regex_flavor(r"a\*"), r"a\*");

assert_eq!(translate_regex_flavor(r"a\?"), r"a\?");
}

#[test]
fn test_intervals() {
assert_eq!(translate_regex_flavor(r"a\{1,3\}"), r"a{1,3}");

assert_eq!(translate_regex_flavor(r"a{1,3}"), r"a\{1,3\}");
}

#[test]
fn test_anchors_context() {
assert_eq!(translate_regex_flavor(r"^abc$"), r"^abc$");

assert_eq!(translate_regex_flavor(r"a^b"), r"a\^b");
assert_eq!(translate_regex_flavor(r"a$b"), r"a\$b");

// Anchors inside groups (reset by \(...\) regardless of position)
assert_eq!(translate_regex_flavor(r"\(^abc\)"), r"(^abc)");
assert_eq!(translate_regex_flavor(r"z\(^abc\)"), r"z(^abc)");
assert_eq!(translate_regex_flavor(r"\(abc$\)"), r"(abc$)");
assert_eq!(translate_regex_flavor(r"\(abc$\)z"), r"(abc$)z");

// Anchors inside alternation (reset by \| regardless of position)
assert_eq!(translate_regex_flavor(r"^a\|^b"), r"^a|^b");
assert_eq!(translate_regex_flavor(r"x\|^b"), r"x|^b");
assert_eq!(translate_regex_flavor(r"a$\|b$"), r"a$|b$");
}

#[test]
fn test_character_classes() {
assert_eq!(translate_regex_flavor(r"[a-z]"), r"[a-z]");

assert_eq!(translate_regex_flavor(r"[.]"), r"[.]");
assert_eq!(translate_regex_flavor(r"[+]"), r"[+]");

assert_eq!(translate_regex_flavor(r"[]abc]"), r"[]abc]");

assert_eq!(translate_regex_flavor(r"[^]abc]"), r"[^]abc]");
}

#[test]
fn test_complex_strings() {
assert_eq!(translate_regex_flavor(r"(\d+)[+*]"), r"\(\d+\)[+*]");

assert_eq!(translate_regex_flavor(r"\(\d+\)\{2\}"), r"(\d+){2}");
}

#[test]
fn test_edge_cases() {
assert_eq!(translate_regex_flavor(r"abc\"), r"abc\");

assert_eq!(translate_regex_flavor(r"\\"), r"\\");

assert_eq!(translate_regex_flavor(r"\^"), r"\^");
}
}
71 changes: 70 additions & 1 deletion tests/by-util/test_tac.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa bbaaa aaabc bcdddd cddddaaabc xyzabc abcxyzabc nbbaaa
#[cfg(target_os = "linux")]
use uutests::at_and_ucmd;
use uutests::new_ucmd;
Expand Down Expand Up @@ -347,3 +347,72 @@ fn test_stdin_bad_tmpdir_fallback() {
.succeeds()
.stdout_is("c\nb\na\n");
}

#[test]
fn test_regex_or_operator() {
new_ucmd!()
.args(&["-r", "-s", r"[^x]\|x"])
.pipe_in("abc")
.succeeds()
.stdout_is("cba");
}

#[test]
fn test_unescaped_middle_anchor() {
new_ucmd!()
.args(&["-r", "-s", r"1^2"])
.pipe_in("111^222")
.succeeds()
.stdout_is("22111^2");

new_ucmd!()
.args(&["-r", "-s", r"a$b"])
.pipe_in("aaa$bbb")
.succeeds()
.stdout_is("bbaaa$b");
}

#[test]
fn test_escaped_middle_anchor() {
new_ucmd!()
.args(&["-r", "-s", r"c\^b"])
.pipe_in("aaabc^bcdddd")
.succeeds()
.stdout_is("cddddaaabc^b");

new_ucmd!()
.args(&["-r", "-s", r"c\$b"])
.pipe_in("aaabc$bcdddd")
.succeeds()
.stdout_is("cddddaaabc$b");
}

#[test]
fn test_regular_start_anchor() {
new_ucmd!()
.args(&["-r", "-s", r"^abc"])
.pipe_in("xyzabc123abc")
.succeeds()
.stdout_is("xyzabc123abc");

new_ucmd!()
.args(&["-r", "-s", r"^b"])
.pipe_in("aaa\nbbb\nccc\n")
.succeeds()
.stdout_is("bb\nccc\naaa\nb");
}

#[test]
fn test_regular_end_anchor() {
new_ucmd!()
.args(&["-r", "-s", r"abc$"])
.pipe_in("123abcxyzabc")
.succeeds()
.stdout_is("123abcxyzabc");

new_ucmd!()
.args(&["-r", "-s", r"b$"])
.pipe_in("aaa\nbbb\nccc\n")
.succeeds()
.stdout_is("\nccc\nbbaaa\nb");
}
Loading