fix: handle invalid UTF-8 in Ruby and Vue preprocessors (#19588)

khasinski · RobinMalfait · web-flow · commit d42b34abbfa2 · 2026-06-04T10:35:47.000Z
## Summary

This PR fixes a panic that occurs when the Ruby or Vue preprocessors
encounter files with invalid UTF-8 bytes.

**The issue:**
- `ruby.rs:37` and `vue.rs:18` used
`std::str::from_utf8(content).unwrap()`
- This panics when processing files containing invalid UTF-8 bytes

**Error message:**
```
thread panicked at crates/oxide/src/extractor/pre_processors/ruby.rs:37:59:
called `Result::unwrap()` on an `Err` value: Utf8Error { valid_up_to: 45, error_len: Some(1) }
```

**The fix:**
- Wrap UTF-8 conversion in `if let Ok(...)` to gracefully handle invalid
UTF-8
- Skip regex-based template extraction when UTF-8 conversion fails
- Allow byte-level processing to continue (in Ruby's case)

This can happen in Rails projects when:
- Binary files are inadvertently scanned
- Files contain non-UTF-8 encodings  
- Files are truncated at multi-byte character boundaries during parallel
processing

## Test plan

- [x] Added `test_invalid_utf8_does_not_panic` test for Ruby
preprocessor
- [x] Added `test_valid_utf8_with_multibyte_chars` test for Ruby
preprocessor
- [x] Added `test_invalid_utf8_does_not_panic` test for Vue preprocessor
- [x] All existing tests pass (`cargo test pre_processors` - 43 tests)

---------

Co-authored-by: Robin Malfait &lt;malfait.robin@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ensure `@tailwindcss/cli` in `--watch` mode recovers when a tracked dependency is deleted and restored ([#20137](https://github.com/tailwindlabs/tailwindcss/pull/20137))
 - Ensure standalone `@tailwindcss/cli` binaries are ignored when scanning for class candidates ([#20139](https://github.com/tailwindlabs/tailwindcss/pull/20139))
 - Ensure class candidates are extracted from Twig `addClass(…)` and `removeClass(…)` calls ([#20198](https://github.com/tailwindlabs/tailwindcss/pull/20198))
+- Don't crash in the Ruby or Vue preprocessors when scanning files containing invalid UTF-8 bytes ([#19588](https://github.com/tailwindlabs/tailwindcss/pull/19588))
 
 ### Changed
 
diff --git a/crates/oxide/src/extractor/pre_processors/ruby.rs b/crates/oxide/src/extractor/pre_processors/ruby.rs
@@ -34,45 +34,47 @@ impl PreProcessor for Ruby {
 
         // Extract embedded template languages
         // https://viewcomponent.org/guide/templates.html#interpolations
-        let content_as_str = std::str::from_utf8(content).unwrap();
-
-        let starts = TEMPLATE_START_REGEX
-            .captures_iter(content_as_str)
-            .collect::<Vec<_>>();
-        let ends = TEMPLATE_END_REGEX
-            .captures_iter(content_as_str)
-            .collect::<Vec<_>>();
-
-        for start in starts.iter() {
-            // The language for this block
-            let lang = start.get(1).unwrap().as_str();
-
-            // The HEREDOC delimiter
-            let delimiter_start = start.get(2).unwrap().as_str();
-
-            // Where the "body" starts for the HEREDOC block
-            let body_start = start.get(0).unwrap().end();
-
-            // Look through all of the ends to find a matching language
-            for end in ends.iter() {
-                // 1. This must appear after the start
-                let body_end = end.get(0).unwrap().start();
-                if body_end < body_start {
-                    continue;
-                }
+        // Only process if content is valid UTF-8, otherwise skip HEREDOC extraction
+        // but still perform the byte-level Ruby processing below
+        if let Ok(content_as_str) = std::str::from_utf8(content) {
+            let starts = TEMPLATE_START_REGEX
+                .captures_iter(content_as_str)
+                .collect::<Vec<_>>();
+            let ends = TEMPLATE_END_REGEX
+                .captures_iter(content_as_str)
+                .collect::<Vec<_>>();
+
+            for start in starts.iter() {
+                // The language for this block
+                let lang = start.get(1).unwrap().as_str();
+
+                // The HEREDOC delimiter
+                let delimiter_start = start.get(2).unwrap().as_str();
+
+                // Where the "body" starts for the HEREDOC block
+                let body_start = start.get(0).unwrap().end();
+
+                // Look through all of the ends to find a matching language
+                for end in ends.iter() {
+                    // 1. This must appear after the start
+                    let body_end = end.get(0).unwrap().start();
+                    if body_end < body_start {
+                        continue;
+                    }
 
-                // The languages must match otherwise we haven't found the end
-                let delimiter_end = end.get(1).unwrap().as_str();
-                if delimiter_end != delimiter_start {
-                    continue;
-                }
+                    // The languages must match otherwise we haven't found the end
+                    let delimiter_end = end.get(1).unwrap().as_str();
+                    if delimiter_end != delimiter_start {
+                        continue;
+                    }
 
-                let body = &content_as_str[body_start..body_end];
-                let replaced =
-                    pre_process_input(body.as_bytes().to_vec(), &lang.to_ascii_lowercase());
+                    let body = &content_as_str[body_start..body_end];
+                    let replaced =
+                        pre_process_input(body.as_bytes().to_vec(), &lang.to_ascii_lowercase());
 
-                result.replace_range(body_start..body_end, replaced);
-                break;
+                    result.replace_range(body_start..body_end, replaced);
+                    break;
+                }
             }
         }
 
@@ -444,4 +446,24 @@ mod tests {
             vec!["text-amber-600", "text-sky-500", "text-green-500"],
         );
     }
+
+    #[test]
+    fn test_invalid_utf8_does_not_panic() {
+        // Invalid UTF-8 sequence: 0x80 is a continuation byte without a leading byte
+        let invalid_utf8: &[u8] = &[0x80, 0x81, 0x82];
+
+        let processor = Ruby::default();
+
+        // Should not panic, just return the input unchanged
+        let result = processor.process(invalid_utf8);
+        assert_eq!(result, invalid_utf8);
+    }
+
+    #[test]
+    fn test_valid_utf8_with_multibyte_chars() {
+        // Test that valid UTF-8 with multi-byte characters (like em-dashes) works
+        let input = "# Comment with em—dash\n%w[flex px-2.5]";
+
+        Ruby::test_extract_contains(input, vec!["flex", "px-2.5"]);
+    }
 }
diff --git a/crates/oxide/src/extractor/pre_processors/vue.rs b/crates/oxide/src/extractor/pre_processors/vue.rs
@@ -14,13 +14,16 @@ pub struct Vue;
 impl PreProcessor for Vue {
     fn process(&self, content: &[u8]) -> Vec<u8> {
         let mut result = content.to_vec();
-        let content_as_str = std::str::from_utf8(content).unwrap();
-        for (_, [lang, body]) in TEMPLATE_REGEX
-            .captures_iter(content_as_str)
-            .map(|c| c.extract())
-        {
-            let replaced = pre_process_input(body.as_bytes().to_vec(), lang);
-            result = result.replace(body, replaced);
+
+        // Only process template tags if content is valid UTF-8
+        if let Ok(content_as_str) = std::str::from_utf8(content) {
+            for (_, [lang, body]) in TEMPLATE_REGEX
+                .captures_iter(content_as_str)
+                .map(|c| c.extract())
+            {
+                let replaced = pre_process_input(body.as_bytes().to_vec(), lang);
+                result = result.replace(body, replaced);
+            }
         }
 
         result
@@ -42,4 +45,16 @@ mod tests {
 
         Vue::test_extract_contains(input, vec!["bg-neutral-900", "text-red-500"]);
     }
+
+    #[test]
+    fn test_invalid_utf8_does_not_panic() {
+        // Invalid UTF-8 sequence: 0x80 is a continuation byte without a leading byte
+        let invalid_utf8: &[u8] = &[0x80, 0x81, 0x82];
+
+        let processor = Vue::default();
+
+        // Should not panic, just return the input unchanged
+        let result = processor.process(invalid_utf8);
+        assert_eq!(result, invalid_utf8);
+    }
 }