|
11 | 11 | */ |
12 | 12 | const quoteMeta = (str) => str.replace(/[-[\]\\/{}()*+?.^$|]/g, "\\$&"); |
13 | 13 |
|
| 14 | +/** |
| 15 | + * @param {string} char character to escape for use in character class |
| 16 | + * @returns {string} escaped character |
| 17 | + */ |
| 18 | +const quoteMetaInCharClass = (char) => { |
| 19 | + // In character class, only these need escaping: ] \ ^ - |
| 20 | + if (char === "]" || char === "\\" || char === "^" || char === "-") { |
| 21 | + return `\\${char}`; |
| 22 | + } |
| 23 | + return char; |
| 24 | +}; |
| 25 | + |
| 26 | +/** |
| 27 | + * Converts an array of single characters into an optimized character class string |
| 28 | + * using ranges where possible. E.g., ["1","2","3","4","a"] => "1-4a" |
| 29 | + * @param {string[]} chars array of single characters (should be sorted) |
| 30 | + * @returns {string} optimized character class content (without the brackets) |
| 31 | + */ |
| 32 | +const charsToCharClassContent = (chars) => { |
| 33 | + if (chars.length === 0) return ""; |
| 34 | + if (chars.length === 1) return quoteMetaInCharClass(chars[0]); |
| 35 | + |
| 36 | + // Sort by char code |
| 37 | + const sorted = [...chars].sort((a, b) => a.charCodeAt(0) - b.charCodeAt(0)); |
| 38 | + |
| 39 | + /** @type {string[]} */ |
| 40 | + const parts = []; |
| 41 | + let rangeStart = sorted[0]; |
| 42 | + let rangeEnd = sorted[0]; |
| 43 | + |
| 44 | + for (let i = 1; i < sorted.length; i++) { |
| 45 | + const char = sorted[i]; |
| 46 | + const prevCode = rangeEnd.charCodeAt(0); |
| 47 | + const currCode = char.charCodeAt(0); |
| 48 | + |
| 49 | + if (currCode === prevCode + 1) { |
| 50 | + // Extend the range |
| 51 | + rangeEnd = char; |
| 52 | + } else { |
| 53 | + // Flush the current range |
| 54 | + parts.push(formatRange(rangeStart, rangeEnd)); |
| 55 | + rangeStart = char; |
| 56 | + rangeEnd = char; |
| 57 | + } |
| 58 | + } |
| 59 | + // Flush the last range |
| 60 | + parts.push(formatRange(rangeStart, rangeEnd)); |
| 61 | + |
| 62 | + return parts.join(""); |
| 63 | +}; |
| 64 | + |
| 65 | +/** |
| 66 | + * Formats a range of characters for use in a character class |
| 67 | + * @param {string} start start character |
| 68 | + * @param {string} end end character |
| 69 | + * @returns {string} formatted range |
| 70 | + */ |
| 71 | +const formatRange = (start, end) => { |
| 72 | + const startCode = start.charCodeAt(0); |
| 73 | + const endCode = end.charCodeAt(0); |
| 74 | + const length = endCode - startCode + 1; |
| 75 | + |
| 76 | + if (length === 1) { |
| 77 | + return quoteMetaInCharClass(start); |
| 78 | + } |
| 79 | + if (length === 2) { |
| 80 | + // For 2 chars, just list them (e.g., "ab" instead of "a-b") |
| 81 | + return quoteMetaInCharClass(start) + quoteMetaInCharClass(end); |
| 82 | + } |
| 83 | + // For 3+ chars, use range notation |
| 84 | + return `${quoteMetaInCharClass(start)}-${quoteMetaInCharClass(end)}`; |
| 85 | +}; |
| 86 | + |
14 | 87 | /** |
15 | 88 | * @param {string} str string |
16 | 89 | * @returns {string} string |
@@ -148,19 +221,20 @@ const itemsToRegexp = (itemsArr) => { |
148 | 221 | } |
149 | 222 | // special case for only single char items |
150 | 223 | if (countOfSingleCharItems === itemsArr.length) { |
151 | | - return `[${quoteMeta(itemsArr.sort().join(""))}]`; |
| 224 | + return `[${charsToCharClassContent(itemsArr)}]`; |
152 | 225 | } |
153 | 226 | /** @type {Set<string>} */ |
154 | 227 | const items = new Set(itemsArr.sort()); |
155 | 228 | if (countOfSingleCharItems > 2) { |
156 | | - let singleCharItems = ""; |
| 229 | + /** @type {string[]} */ |
| 230 | + const singleCharItems = []; |
157 | 231 | for (const item of items) { |
158 | 232 | if (item.length === 1) { |
159 | | - singleCharItems += item; |
| 233 | + singleCharItems.push(item); |
160 | 234 | items.delete(item); |
161 | 235 | } |
162 | 236 | } |
163 | | - finishedItems.push(`[${quoteMeta(singleCharItems)}]`); |
| 237 | + finishedItems.push(`[${charsToCharClassContent(singleCharItems)}]`); |
164 | 238 | } |
165 | 239 |
|
166 | 240 | // special case for 2 items with common prefix/suffix |
@@ -227,8 +301,6 @@ const itemsToRegexp = (itemsArr) => { |
227 | 301 | ); |
228 | 302 | } |
229 | 303 |
|
230 | | - // TODO further optimize regexp, i. e. |
231 | | - // use ranges: (1|2|3|4|a) => [1-4a] |
232 | 304 | /** @type {string[]} */ |
233 | 305 | const conditional = [...finishedItems, ...Array.from(items, quoteMeta)]; |
234 | 306 | if (conditional.length === 1) return conditional[0]; |
|
0 commit comments