mirror of https://github.com/gorhill/uBlock.git
Add ability to parse `removeparam=` as `queryprune=`
Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1356
Related commit:
- bde3164eb4
It is not possible to achieve perfect compatiblity at this
point, but reasonable compatibility should be achieved for
a majority of instances of `removeparam=`.
Notable differences:
--------------------
uBO always matches in a case insensitive manner, there is
no need to ask for case-insensitivity, and no need to use
uppercase characters in `queryprune=` values.
uBO does not escape special regex characters since the
`queryprune=` values are always assumed to be literal
regex expression (leaving out the documented special
characters). This means `removeparam=` with characters
which are special regex characters won't be properly
translated and are unlikely to work properly in uBO.
For example, the `queryprune` value of a filter such as
`$removeparam=__xts__[0]` internally become the literal
regex `/__xts__[0]/`, and consequently would not match
a query parameter such as `...?__xts__[0]=...`.
Notes:
------
Additionally, for performance reason, when uBO encounter
a pattern-less `queryprune=` (or `removeparam=`) filter,
it will try to extract a valid pattern from the
`queryprune=` value. For instance, the following filter:
$queryprune=utm_campaign
Will be translated internally into:
utm_campaign$queryprune=utm_campaign
The logger will reflect this internal translation.
This commit is contained in:
parent
80413dff83
commit
6ac09a2856
|
@ -2092,6 +2092,7 @@ const netOptionTokenDescriptors = new Map([
|
|||
[ 'popunder', OPTTokenPopunder | OPTNonNetworkType | OPTNonCspableType | OPTNonRedirectableType ],
|
||||
[ 'popup', OPTTokenPopup | OPTNonNetworkType | OPTCanNegate | OPTNonCspableType | OPTNonRedirectableType ],
|
||||
[ 'queryprune', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
|
||||
[ 'removeparam', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
|
||||
[ 'redirect', OPTTokenRedirect | OPTMustAssign | OPTAllowMayAssign | OPTModifierType ],
|
||||
[ 'redirect-rule', OPTTokenRedirectRule | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType ],
|
||||
[ 'script', OPTTokenScript | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ],
|
||||
|
@ -2147,6 +2148,7 @@ Parser.netOptionTokenIds = new Map([
|
|||
[ 'popunder', OPTTokenPopunder ],
|
||||
[ 'popup', OPTTokenPopup ],
|
||||
[ 'queryprune', OPTTokenQueryprune ],
|
||||
[ 'removeparam', OPTTokenQueryprune ],
|
||||
[ 'redirect', OPTTokenRedirect ],
|
||||
[ 'redirect-rule', OPTTokenRedirectRule ],
|
||||
[ 'script', OPTTokenScript ],
|
||||
|
|
|
@ -2628,7 +2628,7 @@ const FilterParser = class {
|
|||
this.noTokenHash = urlTokenizer.noTokenHash;
|
||||
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
|
||||
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
|
||||
this.reRegexToken = /[%0-9A-Za-z]+/g;
|
||||
this.reToken = /[%0-9A-Za-z]+/g;
|
||||
this.reRegexTokenAbort = /[\(\)\[\]]/;
|
||||
this.reRegexBadPrefix = /(^|[^\\]\.|\\[%SDWsdw]|[^\\][()*+?[\\\]{}])$/;
|
||||
this.reRegexBadSuffix = /^([^\\]\.|\\[%SDWsdw]|[()*+?[\]{}]|$)/;
|
||||
|
@ -3110,34 +3110,48 @@ const FilterParser = class {
|
|||
// i.e. very common with a high probability of ending up as a miss,
|
||||
// are not good. Avoid if possible. This has a significant positive
|
||||
// impact on performance.
|
||||
//
|
||||
// For pattern-less queryprune filters, try to derive a pattern from
|
||||
// the queryprune value.
|
||||
|
||||
makeToken() {
|
||||
if ( this.pattern === '*' ) { return; }
|
||||
if ( this.pattern === '*' ) {
|
||||
if (
|
||||
this.modifyType !== this.parser.OPTTokenQueryprune ||
|
||||
this.makePatternFromQuerypruneValue() === false
|
||||
) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ( this.isRegex ) {
|
||||
return this.extractTokenFromRegex();
|
||||
}
|
||||
const match = this.extractTokenFromPattern();
|
||||
if ( match === null ) { return; }
|
||||
this.token = match.token;
|
||||
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
||||
this.tokenBeg = match.pos;
|
||||
this.extractTokenFromPattern();
|
||||
}
|
||||
|
||||
// Note: a one-char token is better than a documented bad token.
|
||||
extractTokenFromPattern() {
|
||||
this.reToken.lastIndex = 0;
|
||||
const pattern = this.pattern;
|
||||
let bestMatch = null;
|
||||
let bestBadness = 0x7FFFFFFF;
|
||||
for ( const match of this.parser.patternTokens() ) {
|
||||
const badness = match.token.length > 1
|
||||
? this.badTokens.get(match.token) || 0
|
||||
for (;;) {
|
||||
const match = this.reToken.exec(pattern);
|
||||
if ( match === null ) { break; }
|
||||
const badness = match[0].length > 1
|
||||
? this.badTokens.get(match[0]) || 0
|
||||
: 1;
|
||||
if ( badness === 0 ) { return match; }
|
||||
if ( badness < bestBadness ) {
|
||||
bestMatch = match;
|
||||
if ( badness === 0 ) { break; }
|
||||
bestBadness = badness;
|
||||
}
|
||||
}
|
||||
return bestMatch;
|
||||
if ( bestMatch !== null ) {
|
||||
this.token = bestMatch[0];
|
||||
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
||||
this.tokenBeg = bestMatch.index;
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/2781
|
||||
|
@ -3147,15 +3161,16 @@ const FilterParser = class {
|
|||
// Mind `\b` directives: `/\bads\b/` should result in token being `ads`,
|
||||
// not `bads`.
|
||||
extractTokenFromRegex() {
|
||||
this.reRegexToken.lastIndex = 0;
|
||||
const s = this.pattern;
|
||||
this.reToken.lastIndex = 0;
|
||||
const pattern = this.pattern;
|
||||
let bestToken;
|
||||
let bestBadness = 0x7FFFFFFF;
|
||||
for (;;) {
|
||||
const matches = this.reRegexToken.exec(s);
|
||||
const matches = this.reToken.exec(pattern);
|
||||
if ( matches === null ) { break; }
|
||||
let token = matches[0];
|
||||
let prefix = s.slice(0, matches.index);
|
||||
let suffix = s.slice(this.reRegexToken.lastIndex);
|
||||
let prefix = pattern.slice(0, matches.index);
|
||||
let suffix = pattern.slice(this.reToken.lastIndex);
|
||||
if (
|
||||
this.reRegexTokenAbort.test(prefix) &&
|
||||
this.reRegexTokenAbort.test(suffix)
|
||||
|
@ -3181,13 +3196,47 @@ const FilterParser = class {
|
|||
? this.badTokens.get(token) || 0
|
||||
: 1;
|
||||
if ( badness < bestBadness ) {
|
||||
this.token = token.toLowerCase();
|
||||
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
||||
this.tokenBeg = matches.index;
|
||||
bestToken = token;
|
||||
if ( badness === 0 ) { break; }
|
||||
bestBadness = badness;
|
||||
}
|
||||
}
|
||||
if ( bestToken !== undefined ) {
|
||||
this.token = bestToken.toLowerCase();
|
||||
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
|
||||
}
|
||||
}
|
||||
|
||||
makePatternFromQuerypruneValue() {
|
||||
let pattern = this.modifyValue;
|
||||
if ( pattern === '*' || pattern.charCodeAt(0) === 0x21 /* '!' */ ) {
|
||||
return false;
|
||||
}
|
||||
if ( /^\w+$/.test(pattern) ) {
|
||||
this.pattern = `${pattern}=`;
|
||||
return true;
|
||||
}
|
||||
const reRegex = /^\/(.+)\/i?$/;
|
||||
if ( reRegex.test(pattern) ) {
|
||||
pattern = reRegex.exec(pattern)[1];
|
||||
} else {
|
||||
let prefix = '', suffix = '';
|
||||
if ( pattern.startsWith('|') ) {
|
||||
pattern = pattern.slice(1);
|
||||
prefix = '\\b';
|
||||
}
|
||||
if ( pattern.endsWith('|') ) {
|
||||
pattern = pattern.slice(0, -1);
|
||||
suffix = '\\b';
|
||||
}
|
||||
if ( pattern.indexOf('|') !== -1 ) {
|
||||
pattern = `(?:${pattern})`;
|
||||
}
|
||||
pattern = prefix + pattern + suffix;
|
||||
}
|
||||
this.pattern = pattern;
|
||||
this.isRegex = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
hasNoOptionUnits() {
|
||||
|
@ -4288,6 +4337,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) {
|
|||
|
||||
FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
|
||||
const cache = {};
|
||||
const reRegex = /^\/(.+)\/i?$/;
|
||||
let retext = modifier.value;
|
||||
if ( retext === '*' ) {
|
||||
cache.all = true;
|
||||
|
@ -4296,6 +4346,8 @@ FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
|
|||
if ( cache.not ) { retext = retext.slice(1); }
|
||||
if ( /^\w+$/.test(retext) ) {
|
||||
retext = `^${retext}=`;
|
||||
} else if ( reRegex.test(retext) ) {
|
||||
retext = reRegex.exec(retext)[1];
|
||||
} else {
|
||||
if ( retext.startsWith('|') ) { retext = `^${retext.slice(1)}`; }
|
||||
if ( retext.endsWith('|') ) { retext = `${retext.slice(0,-1)}$`; }
|
||||
|
|
Loading…
Reference in New Issue