Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ansi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ pub fn ansi_preserving_index(s: &str, i: usize) -> Option<usize> {
None
}

fn ansi_strings_iterator(s: &str) -> impl Iterator<Item = (&str, bool)> {
/// Iterate over `s` and yield `(substring, true)` for all ansi control sequences,
/// and `(.., false)` for other substrings (possibly affected by ansi styling).
pub fn ansi_strings_iterator(s: &str) -> impl Iterator<Item = (&str, bool)> {
AnsiElementIterator::new(s).map(move |el| match el {
Element::Sgr(_, i, j) => (&s[i..j], true),
Element::Csi(i, j) => (&s[i..j], true),
Expand Down
35 changes: 29 additions & 6 deletions src/handlers/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::delta::{State, StateMachine};
use crate::handlers::{self, ripgrep_json};
use crate::paint::{self, BgShouldFill, StyleSectionSpecifier};
use crate::style::Style;
use crate::utils::tabs::TabCfg;
use crate::utils::{process, tabs};

use super::hunk_header::HunkHeaderIncludeHunkLabel;
Expand All @@ -28,10 +29,11 @@ pub struct GrepLine<'b> {
}

impl GrepLine<'_> {
fn expand_tabs(&mut self, tab_cfg: &tabs::TabCfg) {
fn expand_tabs(&mut self, raw_line: &mut String, tab_cfg: &tabs::TabCfg) {
let old_len = self.code.len();
self.code = tabs::expand(&self.code, tab_cfg).into();
tab_expand_in_code_and_raw(&mut self.code, raw_line, tab_cfg);
let shift = self.code.len().saturating_sub(old_len);

// HACK: it is not necessarily the case that all submatch coordinates
// should be shifted in this way. It should be true in a common case of:
// (a) the only tabs were at the beginning of the line, and (b) the user
Expand Down Expand Up @@ -191,7 +193,7 @@ impl StateMachine<'_> {
// (At the time of writing, we are in this
// arm iff we are handling `ripgrep --json`
// output.)
grep_line.expand_tabs(&self.config.tab_cfg);
grep_line.expand_tabs(&mut self.raw_line, &self.config.tab_cfg);
make_style_sections(
&grep_line.code,
&grep_line.submatches.unwrap(),
Expand All @@ -206,7 +208,12 @@ impl StateMachine<'_> {
// enough. But at this point it is guaranteed
// that this handler is going to handle this
// line, so mutating it is acceptable.
self.raw_line = tabs::expand(&self.raw_line, &self.config.tab_cfg);
tab_expand_in_code_and_raw(
&mut grep_line.code,
&mut self.raw_line,
&self.config.tab_cfg,
);

get_code_style_sections(
&self.raw_line,
self.config.grep_match_word_style,
Expand Down Expand Up @@ -332,7 +339,7 @@ impl StateMachine<'_> {
// (At the time of writing, we are in this
// arm iff we are handling `ripgrep --json`
// output.)
grep_line.expand_tabs(&self.config.tab_cfg);
grep_line.expand_tabs(&mut self.raw_line, &self.config.tab_cfg);
make_style_sections(
&grep_line.code,
&grep_line.submatches.unwrap(),
Expand All @@ -347,7 +354,12 @@ impl StateMachine<'_> {
// enough. But at the point it is guaranteed
// that this handler is going to handle this
// line, so mutating it is acceptable.
self.raw_line = tabs::expand(&self.raw_line, &self.config.tab_cfg);
tab_expand_in_code_and_raw(
&mut grep_line.code,
&mut self.raw_line,
&self.config.tab_cfg,
);

get_code_style_sections(
&self.raw_line,
self.config.grep_match_word_style,
Expand Down Expand Up @@ -729,6 +741,17 @@ pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b>
})
}

fn tab_expand_in_code_and_raw(
code: &mut Cow<'_, str>,
raw_line: &mut String,
tab_cfg: &TabCfg,
) {
if tabs::has_tab(code) {
*code = tabs::expand_fixed(code, tab_cfg).into();
*raw_line = tabs::expand_fixed(raw_line, tab_cfg);
}
}

#[cfg(test)]
mod tests {
use crate::handlers::grep::{
Expand Down
5 changes: 3 additions & 2 deletions src/paint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -598,9 +598,10 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str
// Remove initial -/+ characters, expand tabs as spaces, retaining ANSI sequences. Terminate with
// newline character.
pub fn prepare_raw_line(raw_line: &str, prefix_length: usize, config: &config::Config) -> String {
let mut line = tabs::expand(raw_line, &config.tab_cfg);
let cut_line = ansi::ansi_preserving_slice(raw_line, prefix_length);
let mut line = tabs::expand_raw(&cut_line, &config.tab_cfg);
line.push('\n');
ansi::ansi_preserving_slice(&line, prefix_length)
line
}

pub fn paint_minus_and_plus_lines(
Expand Down
138 changes: 131 additions & 7 deletions src/utils/tabs.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;

use crate::ansi::ansi_strings_iterator;

pub fn has_tab(line: &str) -> bool {
line.as_bytes().iter().any(|c| *c == b'\t')
}

#[derive(Debug, Clone)]
pub struct TabCfg {
Expand All @@ -17,34 +24,98 @@ impl TabCfg {
pub fn replace(&self) -> bool {
!self.replacement.is_empty()
}
fn replacement_str(&self, upto: usize) -> &str {
&self.replacement[..upto]
}
}

/// Expand tabs as spaces.
pub fn expand(line: &str, tab_cfg: &TabCfg) -> String {
if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
itertools::join(line.split('\t'), &tab_cfg.replacement)
/// Expand tabs as spaces, always using a fixed number of replacement chars.
pub fn expand_fixed(line: &str, tabs: &TabCfg) -> String {
if tabs.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
itertools::join(line.split('\t'), &tabs.replacement)
} else {
line.to_string()
}
}

/// Expand tabs as spaces, taking tabstops into account.
pub fn expand(line: &str, tabs: &TabCfg) -> String {
expand_impl(line, tabs, expand_text)
}

/// Expand tabs as spaces, but don't count ansi escape codes as visible.
pub fn expand_raw(line: &str, tabs: &TabCfg) -> String {
expand_impl(line, tabs, expand_ansi)
}

/// Remove `prefix` chars from `line`, then call `tabs::expand()`.
pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String {
pub fn remove_prefix_and_expand(prefix: usize, line: &str, tabs: &TabCfg) -> String {
let line_bytes = line.as_bytes();
// The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for
// which grapheme clusters are not required.
if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() {
// Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present.
expand(&line[prefix..], tab_cfg)
expand(&line[prefix..], tabs)
} else {
let cut_line = line.graphemes(true).skip(prefix).collect::<String>();
expand(&cut_line, tab_cfg)
expand(&cut_line, tabs)
}
}

#[inline]
fn expand_text(position: &mut usize, expanded: &mut String, line: &str, tabs: &TabCfg) {
for c in line.graphemes(true) {
if c == "\t" {
let upto = tabs.width() - (*position % tabs.width());
expanded.push_str(tabs.replacement_str(upto));
*position = 0;
} else {
expanded.push_str(c);
*position += c.width(); // see 54e1ee79c7cefe - some chars take up more than one cell
}
}
}

#[inline]
fn expand_ansi(position: &mut usize, expanded: &mut String, line: &str, tabs: &TabCfg) {
for (element, is_ansi) in ansi_strings_iterator(line) {
if is_ansi {
// do not increment `position` counter
expanded.push_str(element);
} else {
expand_text(position, expanded, element, tabs);
}
}
}

#[inline]
fn expand_impl<F>(line: &str, tabs: &TabCfg, tab_expander: F) -> String
where
F: Fn(&mut usize, &mut String, &str, &TabCfg),
{
if tabs.replace() && has_tab(line) {
let mut expanded = String::new();
let mut position = 0;
tab_expander(&mut position, &mut expanded, line, tabs);
expanded
} else {
line.to_string()
}
}

#[cfg(test)]
pub mod tests {
use super::*;
use crate::ansi::strip_ansi_codes;
use crate::tests::integration_test_utils::*;

pub const TABSTOP_DIFF: &str = "\
--- a/a
+++ b/b
@@ -1 +1 @@
-1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 x
+1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 y
";

#[test]
fn test_remove_prefix_and_expand() {
Expand All @@ -61,4 +132,57 @@ pub mod tests {
// ensure non-ascii chars were removed:
assert!(utf8_prefix.len() - result.len() > n);
}

#[test]
fn test_tabstops() {
let line = "1234\t1\t12\t123\tZ";
let result = expand(line, &TabCfg::new(4));
assert_eq!(result, "1234 1 12 123 Z");
}

#[test]
fn test_expand_raw() {
let raw_line = "\x1b[32m+\x1b[m\x1b[32mpub\tfn\tfoo() -> bool {\x1b[m";
let expected = "+pub fn foo() -> bool {";
let text_line = strip_ansi_codes(raw_line);
let raw_result = expand_raw(raw_line, &TabCfg::new(7));
let raw_result_noansi = strip_ansi_codes(&raw_result);
let text_result = expand(&text_line, &TabCfg::new(7));
let text_via_ansi = expand_raw(&text_line, &TabCfg::new(7));
let raw_no_expansion = expand_raw(raw_line, &TabCfg::new(0));
assert_eq!(expected, raw_result_noansi);
assert_eq!(expected, text_result);
assert_eq!(expected, text_via_ansi);
assert_eq!(raw_line, raw_no_expansion);
}

#[test]
fn test_tabs_expansion() {
let config = make_config_from_args(&["--tabs", "8"]);
let output = run_delta(TABSTOP_DIFF, &config);
let mut lines = output.lines().skip(crate::config::HEADER_LEN);
let (line_1, line_2) = (lines.next().unwrap(), lines.next().unwrap());
assert_eq!(
"1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 x",
strip_ansi_codes(line_1)
);
assert_eq!(
"1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 y",
strip_ansi_codes(line_2)
);

// the +/- shifts everything, but tab counting remains identical
let config = make_config_from_args(&["--tabs", "4", "--keep-plus-minus-markers"]);
let output = run_delta(TABSTOP_DIFF, &config);
let mut lines = output.lines().skip(crate::config::HEADER_LEN);
let (line_1, line_2) = (lines.next().unwrap(), lines.next().unwrap());
assert_eq!(
"-1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 x",
strip_ansi_codes(line_1)
);
assert_eq!(
"+1 1. 1.. 1..4 1..4. 1..4.. 1..4... 1..4...8 y",
strip_ansi_codes(line_2)
);
}
}