fix(accept): replace regex split to mitigate ReDoS#4758
fix(accept): replace regex split to mitigate ReDoS#4758yusukebe merged 5 commits intohonojs:mainfrom
Conversation
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #4758 +/- ##
==========================================
+ Coverage 91.48% 91.55% +0.07%
==========================================
Files 177 177
Lines 11551 11736 +185
Branches 3353 3432 +79
==========================================
+ Hits 10567 10745 +178
- Misses 983 990 +7
Partials 1 1 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
Hi @EdamAme-x However, while this module can parse standard headers without problems, I believe the following current test is buggy because the expected value is incorrect. (b should be I think the PR's proposed fix was made to pass the current (buggy) tests, but I'm hesitant about whether we should leave it as-is. With the current code, it can't handle patterns where there's a comma inside double quotes either (even though this is a valid header)... I know this is asking a lot, but could we refactor the whole thing? |
|
sure |
|
Hi @EdamAme-x Thank you for the update! While this strays from the ReDoS fixes, I'd like to take this opportunity to handle the entire parsing in one pass. (It will require a complete rewrite, but) I've created a PR against your branch. Would you mind pulling it in? benchmarkaccept.bench.tsimport { bench, describe } from 'vitest'
// ---------------------------------------------------------------------------
// main (regex-based)
// ---------------------------------------------------------------------------
const Main = (() => {
interface Accept {
type: string
params: Record<string, string>
q: number
}
const parseAcceptValueRegex = /;(?=(?:(?:[^"]*"){2})*[^"]*$)/
const parseAcceptValue = ({ value, index }: { value: string; index: number }) => {
const parts = value
.trim()
.split(parseAcceptValueRegex)
.map((s) => s.trim())
const type = parts[0]
if (!type) {
return null
}
const params = parseParams(parts.slice(1))
const q = parseQuality(params.q)
return { type, params, q, index }
}
const parseParams = (paramParts: string[]): Record<string, string> => {
return paramParts.reduce<Record<string, string>>((acc, param) => {
const [key, val] = param.split('=').map((s) => s.trim())
if (key && val) {
acc[key] = val
}
return acc
}, {})
}
const parseQuality = (qVal?: string): number => {
if (qVal === undefined) {
return 1
}
if (qVal === '') {
return 1
}
if (qVal === 'NaN') {
return 0
}
const num = Number(qVal)
if (num === Infinity) {
return 1
}
if (num === -Infinity) {
return 0
}
if (Number.isNaN(num)) {
return 1
}
if (num < 0 || num > 1) {
return 1
}
return num
}
const sortByQualityAndIndex = (a: Accept & { index: number }, b: Accept & { index: number }) => {
const qDiff = b.q - a.q
if (qDiff !== 0) {
return qDiff
}
return a.index - b.index
}
const parseAccept = (acceptHeader: string): Accept[] => {
if (!acceptHeader) {
return []
}
const acceptValues = acceptHeader.split(',').map((value, index) => ({ value, index }))
return acceptValues
.map(parseAcceptValue)
.filter((item): item is Accept & { index: number } => Boolean(item))
.sort(sortByQualityAndIndex)
.map(({ type, params, q }) => ({ type, params, q }))
}
return { parseAccept }
})()
// ---------------------------------------------------------------------------
// 947b22f4 (splitByDelimiter)
// ---------------------------------------------------------------------------
const C947b22f4 = (() => {
interface Accept {
type: string
params: Record<string, string>
q: number
}
const splitByDelimiterOutsideQuotes = (value: string, delimiter: string): string[] => {
const parts: string[] = []
let current = ''
let inQuotes = false
let escaped = false
for (let i = 0; i < value.length; i++) {
const char = value[i]
if (escaped) {
current += char
escaped = false
continue
}
if (char === '\\' && inQuotes) {
current += char
escaped = true
continue
}
if (char === '"') {
inQuotes = !inQuotes
current += char
continue
}
if (char === delimiter && !inQuotes) {
parts.push(current)
current = ''
continue
}
current += char
}
parts.push(current)
return parts
}
const parseAcceptValue = ({ value, index }: { value: string; index: number }) => {
const parts = splitByDelimiterOutsideQuotes(value.trim(), ';').map((s) => s.trim())
const type = parts[0]
if (!type) {
return null
}
const params = parseParams(parts.slice(1))
const q = parseQuality(params.q)
return { type, params, q, index }
}
const parseParams = (paramParts: string[]): Record<string, string> => {
return paramParts.reduce<Record<string, string>>((acc, param) => {
const separatorIndex = param.indexOf('=')
if (separatorIndex <= 0) {
return acc
}
const key = param.slice(0, separatorIndex).trim()
const rawVal = param.slice(separatorIndex + 1).trim()
if (!key || !rawVal) {
return acc
}
const val = parseParamValue(rawVal)
if (val !== null) {
acc[key] = val
}
return acc
}, {})
}
const parseParamValue = (value: string): string | null => {
if (value[0] !== '"') {
return value.includes('=') ? null : value
}
return parseQuotedString(value)
}
const parseQuotedString = (value: string): string | null => {
let unescaped = ''
let escaped = false
for (let i = 1; i < value.length; i++) {
const char = value[i]
if (escaped) {
unescaped += char
escaped = false
continue
}
if (char === '\\') {
escaped = true
continue
}
if (char === '"') {
return value.slice(i + 1).trim() ? null : unescaped
}
unescaped += char
}
return null
}
const parseQuality = (qVal?: string): number => {
if (qVal === undefined) {
return 1
}
if (qVal === '') {
return 1
}
if (qVal === 'NaN') {
return 0
}
const num = Number(qVal)
if (num === Infinity) {
return 1
}
if (num === -Infinity) {
return 0
}
if (Number.isNaN(num)) {
return 1
}
if (num < 0 || num > 1) {
return 1
}
return num
}
const sortByQualityAndIndex = (a: Accept & { index: number }, b: Accept & { index: number }) => {
const qDiff = b.q - a.q
if (qDiff !== 0) {
return qDiff
}
return a.index - b.index
}
const parseAccept = (acceptHeader: string): Accept[] => {
if (!acceptHeader) {
return []
}
const acceptValues = splitByDelimiterOutsideQuotes(acceptHeader, ',').map((value, index) => ({
value,
index,
}))
return acceptValues
.map(parseAcceptValue)
.filter((item): item is Accept & { index: number } => Boolean(item))
.sort(sortByQualityAndIndex)
.map(({ type, params, q }) => ({ type, params, q }))
}
return { parseAccept }
})()
// ---------------------------------------------------------------------------
// c829099 (single-pass)
// ---------------------------------------------------------------------------
const Cc829099 = (() => {
interface Accept {
type: string
params: Record<string, string>
q: number
}
const isWhitespace = (char: number): boolean =>
char === 32 || char === 9 || char === 10 || char === 13
const consumeWhitespace = (acceptHeader: string, startIndex: number): number => {
while (startIndex < acceptHeader.length) {
if (!isWhitespace(acceptHeader.charCodeAt(startIndex))) {
break
}
startIndex++
}
return startIndex
}
const ignoreTrailingWhitespace = (acceptHeader: string, startIndex: number): number => {
while (startIndex > 0) {
if (!isWhitespace(acceptHeader.charCodeAt(startIndex - 1))) {
break
}
startIndex--
}
return startIndex
}
const skipInvalidParam = (acceptHeader: string, startIndex: number): [number, boolean] => {
while (startIndex < acceptHeader.length) {
const char = acceptHeader.charCodeAt(startIndex)
if (char === 59) {
return [startIndex + 1, true]
}
if (char === 44) {
return [startIndex + 1, false]
}
startIndex++
}
return [startIndex, false]
}
const skipInvalidAcceptValue = (acceptHeader: string, startIndex: number): number => {
let i = startIndex
let inQuotes = false
while (i < acceptHeader.length) {
const char = acceptHeader.charCodeAt(i)
if (inQuotes && char === 92) {
i++
} else if (char === 34) {
inQuotes = !inQuotes
} else if (!inQuotes && char === 44) {
return i + 1
}
i++
}
return i
}
const getNextParam = (
acceptHeader: string,
startIndex: number
): [number, string | undefined, string | undefined, boolean] => {
startIndex = consumeWhitespace(acceptHeader, startIndex)
let i = startIndex
let key: string | undefined
let value: string | undefined
let hasNext = false
while (i < acceptHeader.length) {
const char = acceptHeader.charCodeAt(i)
if (char === 61) {
key = acceptHeader.slice(startIndex, ignoreTrailingWhitespace(acceptHeader, i))
i++
break
}
if (char === 59) {
return [i + 1, undefined, undefined, true]
}
if (char === 44) {
return [i + 1, undefined, undefined, false]
}
i++
}
if (key === undefined) {
return [i, undefined, undefined, false]
}
i = consumeWhitespace(acceptHeader, i)
if (acceptHeader.charCodeAt(i) === 61) {
const skipResult = skipInvalidParam(acceptHeader, i + 1)
return [skipResult[0], key, undefined, skipResult[1]]
}
let inQuotes = false
const paramStartIndex = i
while (i < acceptHeader.length) {
const char = acceptHeader.charCodeAt(i)
if (inQuotes && char === 92) {
i++
} else if (char === 34) {
if (inQuotes) {
let nextIndex = consumeWhitespace(acceptHeader, i + 1)
const nextChar = acceptHeader.charCodeAt(nextIndex)
if (nextIndex < acceptHeader.length && !(nextChar === 59 || nextChar === 44)) {
const skipResult = skipInvalidParam(acceptHeader, nextIndex)
return [skipResult[0], key, undefined, skipResult[1]]
}
value = acceptHeader.slice(paramStartIndex + 1, i)
if (value.includes('\\')) {
value = value.replace(/\\(.)/g, '$1')
}
if (nextChar === 44) {
return [nextIndex + 1, key, value, false]
}
if (nextChar === 59) {
hasNext = true
nextIndex++
}
i = nextIndex
break
}
inQuotes = true
} else if (!inQuotes && (char === 59 || char === 44)) {
value = acceptHeader.slice(paramStartIndex, ignoreTrailingWhitespace(acceptHeader, i))
if (char === 59) {
hasNext = true
}
i++
break
}
i++
}
return [
i,
key,
value ?? acceptHeader.slice(paramStartIndex, ignoreTrailingWhitespace(acceptHeader, i)),
hasNext,
]
}
const getNextAcceptValue = (
acceptHeader: string,
startIndex: number
): [number, Accept | undefined] => {
const accept: Accept = { type: '', params: {}, q: 1 }
startIndex = consumeWhitespace(acceptHeader, startIndex)
let i = startIndex
while (i < acceptHeader.length) {
const char = acceptHeader.charCodeAt(i)
if (char === 59 || char === 44) {
accept.type = acceptHeader.slice(startIndex, ignoreTrailingWhitespace(acceptHeader, i))
i++
if (char === 44) {
return [i, accept.type ? accept : undefined]
}
if (!accept.type) {
return [skipInvalidAcceptValue(acceptHeader, i), undefined]
}
break
}
i++
}
if (!accept.type) {
accept.type = acceptHeader.slice(
startIndex,
ignoreTrailingWhitespace(acceptHeader, acceptHeader.length)
)
return [acceptHeader.length, accept.type ? accept : undefined]
}
let param: string | undefined
let value: string | undefined
let hasNext: boolean
while (i < acceptHeader.length) {
;[i, param, value, hasNext] = getNextParam(acceptHeader, i)
if (param && value) {
accept.params[param] = value
}
if (!hasNext) {
break
}
}
return [i, accept] as [number, Accept]
}
const parseQuality = (qVal?: string): number => {
if (qVal === undefined) {
return 1
}
if (qVal === '') {
return 1
}
if (qVal === 'NaN') {
return 0
}
const num = Number(qVal)
if (num === Infinity) {
return 1
}
if (num === -Infinity) {
return 0
}
if (Number.isNaN(num)) {
return 1
}
if (num < 0 || num > 1) {
return 1
}
return num
}
const parseAccept = (acceptHeader: string): Accept[] => {
if (!acceptHeader) {
return []
}
const values: Accept[] = []
let i = 0
let accept: Accept | undefined
let requiresSort = false
let lastAccept: Accept | undefined
while (i < acceptHeader.length) {
;[i, accept] = getNextAcceptValue(acceptHeader, i)
if (accept) {
accept.q = parseQuality(accept.params.q)
values.push(accept)
if (lastAccept && lastAccept.q < accept.q) {
requiresSort = true
}
lastAccept = accept
}
}
if (requiresSort) {
values.sort((a, b) => b.q - a.q)
}
return values
}
return { parseAccept }
})()
// ---------------------------------------------------------------------------
// Benchmark
// ---------------------------------------------------------------------------
const inputs = {
'simple: text/html': 'text/html',
'standard: 3 types with q': 'text/html, application/json;q=0.9, */*;q=0.8',
'many: 10 types':
'text/html, application/xhtml+xml, application/xml;q=0.9, text/plain;q=0.8, image/webp, image/apng, */*;q=0.7, application/signed-exchange;v=b3;q=0.6, text/css;q=0.5, application/json;q=0.4',
'quoted params': 'text/html;param="value with spaces";q=0.9, application/json;charset="utf-8"',
'ReDoS pattern': 'a' + ']'.repeat(40),
} as const
for (const [label, input] of Object.entries(inputs)) {
describe(label, () => {
bench('main (regex)', () => {
Main.parseAccept(input)
})
bench('947b22f4 (splitByDelimiter)', () => {
C947b22f4.parseAccept(input)
})
bench('c829099 (single-pass)', () => {
Cc829099.parseAccept(input)
})
})
} |
refactor: improve accept header parsing
|
Sorry! I missed it. I've just merged it now! |
|
Thank you! |
|
Hi @yusukebe |
|
@EdamAme-x @usualoma Thank you! Merging. |
Summary
In proxy configurations without header size limits, this can cause real impact.
Proof
[ {"segments": 4000, "time": 12.8}, {"segments": 8000, "time": 51.07}, {"segments": 16000, "time": 203.52} ]The author should do the following, if applicable
bun run format:fix && bun run lint:fixto format the code