@@ -1849,36 +1849,42 @@ fn is_annotation_empty(annotation: &Annotation<'_>) -> bool {
18491849 . all ( |fragment| fragment. content . is_empty ( ) )
18501850}
18511851
1852- // We replace some characters so the CLI output is always consistent and underlines aligned.
1853- const OUTPUT_REPLACEMENTS : & [ ( char , & str ) ] = & [
1854- ( '\t' , " " ) , // We do our own tab replacement
1855- ( '\u{200D}' , "" ) , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
1856- ( '\u{202A}' , "" ) , // The following unicode text flow control characters are inconsistently
1857- ( '\u{202B}' , "" ) , // supported across CLIs and can cause confusion due to the bytes on disk
1858- ( '\u{202D}' , "" ) , // not corresponding to the visible source code, so we replace them always.
1859- ( '\u{202E}' , "" ) ,
1860- ( '\u{2066}' , "" ) ,
1861- ( '\u{2067}' , "" ) ,
1862- ( '\u{2068}' , "" ) ,
1863- ( '\u{202C}' , "" ) ,
1864- ( '\u{2069}' , "" ) ,
1865- ] ;
1866-
18671852fn normalize_whitespace ( str : & str ) -> Cow < ' _ , str > {
1868- // This is an optimization to avoid repeated `str::replace` calls in the typical case of no
1869- // valid replacements. Note that this list needs to be kept in sync with `OUTPUT_REPLACEMENTS`.
1870- if !str. contains ( [
1871- '\t' , '\u{200d}' , '\u{202a}' , '\u{202b}' , '\u{202d}' , '\u{202e}' , '\u{2066}' , '\u{2067}' ,
1872- '\u{2068}' , '\u{202c}' , '\u{2069}' ,
1873- ] ) {
1874- return Cow :: Borrowed ( str) ;
1853+ let mut output = String :: new ( ) ;
1854+ let mut last_index = 0usize ;
1855+
1856+ // We replace some characters so the CLI output is always consistent and underlines aligned.
1857+ for ( index, c) in str. char_indices ( ) {
1858+ let replacement = match c {
1859+ '\t' => " " , // We do our own tab replacement
1860+ '\u{200D}' => "" , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
1861+ '\u{202A}' => "" , // The following unicode text flow control characters are inconsistently
1862+ '\u{202B}' => "" , // supported across CLIs and can cause confusion due to the bytes on disk
1863+ '\u{202D}' => "" , // not corresponding to the visible source code, so we replace them always.
1864+ '\u{202E}' => "" ,
1865+ '\u{2066}' => "" ,
1866+ '\u{2067}' => "" ,
1867+ '\u{2068}' => "" ,
1868+ '\u{202C}' => "" ,
1869+ '\u{2069}' => "" ,
1870+ _ => continue ,
1871+ } ;
1872+
1873+ if output. is_empty ( ) {
1874+ output. reserve ( str. len ( ) ) ;
1875+ }
1876+
1877+ output. push_str ( & str[ last_index..index] ) ;
1878+ output. push_str ( replacement) ;
1879+ last_index = index + c. len_utf8 ( ) ;
18751880 }
18761881
1877- let mut s = str. to_owned ( ) ;
1878- for ( c, replacement) in OUTPUT_REPLACEMENTS {
1879- s = s. replace ( * c, replacement) ;
1882+ if output. is_empty ( ) {
1883+ Cow :: Borrowed ( str)
1884+ } else {
1885+ output. push_str ( & str[ last_index..] ) ;
1886+ Cow :: Owned ( output)
18801887 }
1881- Cow :: Owned ( s)
18821888}
18831889
18841890fn overlaps (
0 commit comments