Skip to content

Commit 1c764d1

Browse files
committed
micha's suggestion for normalize_whitespace
1 parent 8147494 commit 1c764d1

1 file changed

Lines changed: 32 additions & 26 deletions

File tree

crates/ruff_annotate_snippets/src/renderer/display_list.rs

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,36 +1849,42 @@ fn is_annotation_empty(annotation: &Annotation<'_>) -> bool {
18491849
.all(|fragment| fragment.content.is_empty())
18501850
}
18511851

1852-
// We replace some characters so the CLI output is always consistent and underlines aligned.
1853-
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
1854-
('\t', " "), // We do our own tab replacement
1855-
('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
1856-
('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
1857-
('\u{202B}', ""), // supported across CLIs and can cause confusion due to the bytes on disk
1858-
('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
1859-
('\u{202E}', ""),
1860-
('\u{2066}', ""),
1861-
('\u{2067}', ""),
1862-
('\u{2068}', ""),
1863-
('\u{202C}', ""),
1864-
('\u{2069}', ""),
1865-
];
1866-
18671852
fn normalize_whitespace(str: &str) -> Cow<'_, str> {
1868-
// This is an optimization to avoid repeated `str::replace` calls in the typical case of no
1869-
// valid replacements. Note that this list needs to be kept in sync with `OUTPUT_REPLACEMENTS`.
1870-
if !str.contains([
1871-
'\t', '\u{200d}', '\u{202a}', '\u{202b}', '\u{202d}', '\u{202e}', '\u{2066}', '\u{2067}',
1872-
'\u{2068}', '\u{202c}', '\u{2069}',
1873-
]) {
1874-
return Cow::Borrowed(str);
1853+
let mut output = String::new();
1854+
let mut last_index = 0usize;
1855+
1856+
// We replace some characters so the CLI output is always consistent and underlines aligned.
1857+
for (index, c) in str.char_indices() {
1858+
let replacement = match c {
1859+
'\t' => " ", // We do our own tab replacement
1860+
'\u{200D}' => "", // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
1861+
'\u{202A}' => "", // The following unicode text flow control characters are inconsistently
1862+
'\u{202B}' => "", // supported across CLIs and can cause confusion due to the bytes on disk
1863+
'\u{202D}' => "", // not corresponding to the visible source code, so we replace them always.
1864+
'\u{202E}' => "",
1865+
'\u{2066}' => "",
1866+
'\u{2067}' => "",
1867+
'\u{2068}' => "",
1868+
'\u{202C}' => "",
1869+
'\u{2069}' => "",
1870+
_ => continue,
1871+
};
1872+
1873+
if output.is_empty() {
1874+
output.reserve(str.len());
1875+
}
1876+
1877+
output.push_str(&str[last_index..index]);
1878+
output.push_str(replacement);
1879+
last_index = index + c.len_utf8();
18751880
}
18761881

1877-
let mut s = str.to_owned();
1878-
for (c, replacement) in OUTPUT_REPLACEMENTS {
1879-
s = s.replace(*c, replacement);
1882+
if output.is_empty() {
1883+
Cow::Borrowed(str)
1884+
} else {
1885+
output.push_str(&str[last_index..]);
1886+
Cow::Owned(output)
18801887
}
1881-
Cow::Owned(s)
18821888
}
18831889

18841890
fn overlaps(

0 commit comments

Comments
 (0)