Skip to content

Commit 99ed1b8

Browse files
committed
Make convert_while_ascii unsafe
1 parent 12f35ad commit 99ed1b8

2 files changed

Lines changed: 13 additions & 4 deletions

File tree

library/alloc/src/str.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,9 @@ impl str {
378378
without modifying the original"]
379379
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
380380
pub fn to_lowercase(&self) -> String {
381-
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
381+
// SAFETY: `to_ascii_lowercase` preserves ASCII bytes, so the converted
382+
// prefix remains valid UTF-8.
383+
let (mut s, rest) = unsafe { convert_while_ascii(self, u8::to_ascii_lowercase) };
382384

383385
let prefix_len = s.len();
384386

@@ -463,7 +465,9 @@ impl str {
463465
without modifying the original"]
464466
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
465467
pub fn to_uppercase(&self) -> String {
466-
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);
468+
// SAFETY: `to_ascii_uppercase` preserves ASCII bytes, so the converted
469+
// prefix remains valid UTF-8.
470+
let (mut s, rest) = unsafe { convert_while_ascii(self, u8::to_ascii_uppercase) };
467471

468472
for c in rest.chars() {
469473
match conversions::to_upper(c) {
@@ -626,11 +630,15 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
626630
///
627631
/// This function is only public so that it can be verified in a codegen test,
628632
/// see `issue-123712-str-to-lower-autovectorization.rs`.
633+
///
634+
/// # Safety
635+
///
636+
/// `convert` must return an ASCII byte for every ASCII input byte.
629637
#[unstable(feature = "str_internals", issue = "none")]
630638
#[doc(hidden)]
631639
#[inline]
632640
#[cfg(not(no_global_oom_handling))]
633-
pub fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
641+
pub unsafe fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
634642
// Process the input in chunks of 16 bytes to enable auto-vectorization.
635643
// Previously the chunk size depended on the size of `usize`,
636644
// but on 32-bit platforms with sse or neon is also the better choice.

tests/codegen-llvm/issues/issue-123712-str-to-lower-autovectorization.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ extern crate alloc;
1919
// CHECK-NEXT: [[C:%[0-9]]] = bitcast <16 x i1> [[B]] to i16
2020
#[no_mangle]
2121
pub fn lower_while_ascii(s: &str) -> (alloc::string::String, &str) {
22-
alloc::str::convert_while_ascii(s, u8::to_ascii_lowercase)
22+
// SAFETY: `to_ascii_lowercase` preserves ASCII bytes.
23+
unsafe { alloc::str::convert_while_ascii(s, u8::to_ascii_lowercase) }
2324
}

0 commit comments

Comments
 (0)