Skip to content

Commit 408b538

Browse files
committed
[mv3] Add support to convert header= option to DNR rules
Related issue: uBlockOrigin/uBOL-home#157 The `header=` option will be converted into DNR's `responseHeaders` condition. There will be an attempt to convert regex-based values into DNR- compatible syntax. Not all regex-based patterns can be converted to use DNR's patterns with `*` and `?` special characters. The implementation of `header=` option in uBO has been revisited to improve compatibility with DNR syntax to minimize burden for list maintainers when creating `header=` filters compatible with both uBO and uBOL. The changes: - Header names are now case-insensitive by default - Occurrences of `*` in non-regex-based header values now mean "matches any number of characters" - Occurrences of `?` in non-regex-based header values now mean "matches zero or one character" At time of commit, and as per MDN, only Chromium-based browsers currently support filtering on repsonse headers: https://developer.mozilla.org/docs/Mozilla/Add-ons/WebExtensions/API/declarativeNetRequest/HeaderInfo Also as per MDN, Chromium 121-127 silently ignore the `responseHeaders` condition, potentially causing undue blocking of network requests. Currently uBOL support Chromium 122 and later, meaning we need to mind potential false positives in Chromium 122-127 for filters using `header=` option.
1 parent c44f043 commit 408b538

File tree

4 files changed

+125
-17
lines changed

4 files changed

+125
-17
lines changed

platform/mv3/make-rulesets.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,10 @@ function patchRuleset(ruleset) {
333333
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
334334
continue;
335335
}
336+
if ( Array.isArray(rule.condition.responseHeaders) ) {
337+
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
338+
continue;
339+
}
336340
if ( Array.isArray(condition.requestMethods) ) {
337341
log(`Safari's incomplete API: ${JSON.stringify(rule)}`, true);
338342
continue;

src/js/regex-analyzer.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ function _isRE2(node) {
6161
return true;
6262
}
6363

64+
/******************************************************************************/
65+
6466
function _literalStrFromRegex(reStr) {
6567
if ( RegexAnalyzer === null ) { return ''; }
6668
let s = '';
@@ -200,3 +202,55 @@ function tokenizableStrFromNode(node) {
200202
}
201203
return '\x01';
202204
}
205+
206+
/******************************************************************************/
207+
208+
export function toHeaderPattern(reStr) {
209+
if ( RegexAnalyzer === null ) { return; }
210+
try {
211+
return _toHeaderPattern(RegexAnalyzer(reStr, false).tree());
212+
} catch {
213+
}
214+
}
215+
216+
function _toHeaderPattern(branch, depth = 0) {
217+
switch ( branch.type ) {
218+
case 1: /* T_SEQUENCE, 'Sequence' */ {
219+
let s = '';
220+
for ( const node of branch.val ) {
221+
const t = _toHeaderPattern(node, depth+1);
222+
if ( t === undefined ) { return; }
223+
s += t;
224+
}
225+
if ( depth === 0 && branch.val.length !== 0 ) {
226+
const first = branch.val[0];
227+
if ( first.type !== 128 || first.val !== '^' ) { s = `*${s}`; }
228+
const last = branch.val.at(-1);
229+
if ( last.type !== 128 || last.val !== '$' ) { s = `${s}*`; }
230+
}
231+
return s;
232+
}
233+
case 4: /* T_GROUP, 'Group' */ {
234+
if (
235+
branch.flags.NegativeLookAhead === 1 ||
236+
branch.flags.NegativeLookBehind === 1
237+
) {
238+
return;
239+
}
240+
return _toHeaderPattern(branch.val, depth+1);
241+
}
242+
case 64: /* T_HEXCHAR, 'HexChar' */
243+
return branch.flags.Char;
244+
case 128: /* T_SPECIAL, 'Special' */ {
245+
if ( branch.val === '^' ) { return ''; }
246+
if ( branch.val === '$' ) { return ''; }
247+
return;
248+
}
249+
case 1024: /* T_STRING, 'String' */
250+
return branch.val;
251+
case 2048: /* T_COMMENT, 'Comment' */
252+
return '';
253+
default:
254+
break;
255+
}
256+
}

src/js/static-filtering-parser.js

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,9 @@ const exCharCodeAt = (s, i) => {
630630
return pos >= 0 ? s.charCodeAt(pos) : -1;
631631
};
632632

633+
const escapeForRegex = s =>
634+
s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
635+
633636
/******************************************************************************/
634637

635638
class AstWalker {
@@ -3024,25 +3027,44 @@ export function parseHeaderValue(arg) {
30243027
const out = { };
30253028
let pos = s.indexOf(':');
30263029
if ( pos === -1 ) { pos = s.length; }
3027-
out.name = s.slice(0, pos);
3030+
out.name = s.slice(0, pos).toLowerCase();
30283031
out.bad = out.name === '';
30293032
s = s.slice(pos + 1);
30303033
out.not = s.charCodeAt(0) === 0x7E /* '~' */;
30313034
if ( out.not ) { s = s.slice(1); }
30323035
out.value = s;
3036+
if ( s === '' ) { return out; }
30333037
const match = /^\/(.+)\/(i)?$/.exec(s);
3034-
if ( match !== null ) {
3035-
try {
3036-
out.re = new RegExp(match[1], match[2] || '');
3037-
}
3038-
catch {
3039-
out.bad = true;
3040-
}
3038+
out.isRegex = match !== null;
3039+
if ( out.isRegex ) {
3040+
out.reStr = match[1];
3041+
out.reFlags = match[2] || '';
3042+
try { new RegExp(out.reStr, out.reFlags); }
3043+
catch { out.bad = true; }
3044+
return out;
3045+
}
3046+
out.reFlags = 'i';
3047+
if ( /[*?]/.test(s) === false ) {
3048+
out.reStr = escapeForRegex(s);
3049+
return out;
30413050
}
3051+
const reConstruct = /(?<!\\)[*?]/g;
3052+
const reParts = [];
3053+
let beg = 0;
3054+
for (;;) {
3055+
const match = reConstruct.exec(s);
3056+
if ( match === null ) { break; }
3057+
reParts.push(
3058+
escapeForRegex(s.slice(beg, match.index)),
3059+
match[0] === '*' ? '.*' : '.?',
3060+
);
3061+
beg = reConstruct.lastIndex;
3062+
}
3063+
reParts.push(escapeForRegex(s.slice(beg)));
3064+
out.reStr = reParts.join('');
30423065
return out;
30433066
}
30443067

3045-
30463068
// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier
30473069

30483070
export function parseReplaceByRegexValue(s) {
@@ -3194,7 +3216,6 @@ class ExtSelectorCompiler {
31943216
// /^(?:[A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|[.#][A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\](?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*)(?:(?:\s+|\s*[>+~]\s*)(?:[A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|[.#][A-Za-z_][\w-]*(?:[.#][A-Za-z_][\w-]*)*(?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*|\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\](?:\[[A-Za-z_][\w-]*(?:[*^$]?="[^"\]\\]+")?\])*))*$/
31953217

31963218
this.reEatBackslashes = /\\([()])/g;
3197-
this.reEscapeRegex = /[.*+?^${}()|[\]\\]/g;
31983219
// https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-classes
31993220
this.knownPseudoClasses = new Set([
32003221
'active', 'any-link', 'autofill',
@@ -4043,7 +4064,7 @@ class ExtSelectorCompiler {
40434064
regexDetails = [ regexDetails, match[2] ];
40444065
}
40454066
} else {
4046-
regexDetails = '^' + value.replace(this.reEscapeRegex, '\\$&') + '$';
4067+
regexDetails = `^${escapeForRegex(value)}$`;
40474068
}
40484069
return { name, pseudo, value: regexDetails };
40494070
}

src/js/static-net-filtering.js

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import * as sfp from './static-filtering-parser.js';
2323

2424
import { domainFromHostname, hostnameFromNetworkURL } from './uri-utils.js';
2525
import { dropTask, queueTask } from './tasks.js';
26-
import { isRE2, tokenizableStrFromRegex } from './regex-analyzer.js';
26+
import { isRE2, toHeaderPattern, tokenizableStrFromRegex } from './regex-analyzer.js';
2727

2828
import BidiTrieContainer from './biditrie.js';
2929
import { CompiledListReader } from './static-filtering-io.js';
@@ -2929,18 +2929,26 @@ class FilterOnHeaders {
29292929
if ( refs.$parsed === null ) {
29302930
refs.$parsed = sfp.parseHeaderValue(refs.headerOpt);
29312931
}
2932-
const { bad, name, not, re, value } = refs.$parsed;
2932+
const { bad, name, not, value } = refs.$parsed;
29332933
if ( bad ) { return false; }
29342934
const headerValue = $httpHeaders.lookup(name);
29352935
if ( headerValue === undefined ) { return false; }
29362936
if ( value === '' ) { return true; }
2937-
return re === undefined
2938-
? (headerValue === value) !== not
2939-
: re.test(headerValue) !== not;
2937+
let { re } = refs.$parsed;
2938+
if ( re === undefined ) {
2939+
re = new RegExp(refs.$parsed.reStr, refs.$parsed.reFlags);
2940+
refs.$parsed.re = re;
2941+
}
2942+
return re.test(headerValue) !== not;
29402943
}
29412944

29422945
static compile(details) {
2943-
return [ FilterOnHeaders.fid, details.optionValues.get('header') ];
2946+
const parsed = sfp.parseHeaderValue(details.optionValues.get('header'));
2947+
let normalized = parsed.name;
2948+
if ( parsed.value !== '' ) {
2949+
normalized += `:${parsed.value}`;
2950+
}
2951+
return [ FilterOnHeaders.fid, normalized ];
29442952
}
29452953

29462954
static fromCompiled(args) {
@@ -2954,6 +2962,27 @@ class FilterOnHeaders {
29542962
}
29552963

29562964
static dnrFromCompiled(args, rule) {
2965+
rule.condition ||= {};
2966+
const parsed = sfp.parseHeaderValue(args[1]);
2967+
if ( parsed.bad !== true ) {
2968+
const value = parsed.isRegex
2969+
? toHeaderPattern(parsed.reStr)
2970+
: parsed.value;
2971+
if ( value !== undefined ) {
2972+
const prop = parsed.not
2973+
? 'excludedResponseHeaders'
2974+
: 'responseHeaders';
2975+
rule.condition[prop] ||= [];
2976+
const details = {
2977+
header: parsed.name,
2978+
};
2979+
if ( value !== '' ) {
2980+
details.values = [ value ];
2981+
}
2982+
rule.condition[prop].push(details);
2983+
return;
2984+
}
2985+
}
29572986
dnrAddRuleError(rule, `header="${args[1]}" not supported`);
29582987
}
29592988

0 commit comments

Comments
 (0)