mirror of https://github.com/gorhill/uBlock.git
Improve validation of hostname in `domain=` and `denyallow` options
Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1249 For "exotic" hostname values, the browser's own API will be used to ultimately validate hostname values.
This commit is contained in:
parent
7e906b33c5
commit
3f299ef623
|
@ -108,6 +108,10 @@ const Parser = class {
|
|||
this.reHostsSource = /^[^\x00-\x24\x26-\x29\x2B\x2C\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]+$/;
|
||||
this.reUnicodeChar = /[^\x00-\x7F]/;
|
||||
this.reUnicodeChars = /[^\x00-\x7F]/g;
|
||||
this.reHostnameLabel = /[^.]+/g;
|
||||
this.rePlainHostname = /^(?:[\w-]+\.)*[a-z]+$/;
|
||||
this.rePlainEntity = /^(?:[\w-]+\.)+\*$/;
|
||||
this.reEntity = /^[^*]+\.\*$/;
|
||||
this.punycoder = new URL(self.location);
|
||||
this.selectorCompiler = new this.SelectorCompiler(this);
|
||||
// TODO: reuse for network filtering analysis
|
||||
|
@ -313,7 +317,7 @@ const Parser = class {
|
|||
analyzeExtExtra() {
|
||||
if ( this.hasOptions() ) {
|
||||
const { i, len } = this.optionsSpan;
|
||||
this.analyzeDomainList(i, i + len, BITComma, 0b11);
|
||||
this.analyzeDomainList(i, i + len, BITComma, 0b1110);
|
||||
}
|
||||
if ( hasBits(this.flavorBits, BITFlavorUnsupported) ) {
|
||||
this.markSpan(this.patternSpan, BITError);
|
||||
|
@ -668,66 +672,62 @@ const Parser = class {
|
|||
}
|
||||
}
|
||||
|
||||
// bits:
|
||||
// 0: can use entity-based hostnames
|
||||
// 1: can use single wildcard
|
||||
analyzeDomain(from, to, optionBits) {
|
||||
const { slices } = this;
|
||||
let len = to - from;
|
||||
if ( len === 0 ) { return false; }
|
||||
const not = hasBits(slices[from], BITTilde);
|
||||
if ( not ) {
|
||||
if ( (optionBits & 0b01) === 0 || slices[from+2] > 1 ) { return false; }
|
||||
from += 3;
|
||||
len -= 3;
|
||||
analyzeDomain(from, to, modeBits) {
|
||||
if ( to === from ) { return false; }
|
||||
return this.normalizeHostnameValue(
|
||||
this.strFromSlices(from, to - 3),
|
||||
modeBits
|
||||
) !== undefined;
|
||||
}
|
||||
|
||||
// Ultimately, let the browser API do the hostname normalization, after
|
||||
// making some other trivial checks.
|
||||
//
|
||||
// modeBits:
|
||||
// 0: can use wildcard at any position
|
||||
// 1: can use entity-based hostnames
|
||||
// 2: can use single wildcard
|
||||
// 3: can be negated
|
||||
normalizeHostnameValue(s, modeBits = 0b0000) {
|
||||
const not = s.charCodeAt(0) === 0x7E /* '~' */;
|
||||
if ( not && (modeBits & 0b1000) === 0 ) { return; }
|
||||
let hn = not === false ? s : s.slice(1);
|
||||
if ( this.rePlainHostname.test(hn) ) { return s; }
|
||||
const hasWildcard = hn.lastIndexOf('*') !== -1;
|
||||
if ( hasWildcard ) {
|
||||
if ( modeBits === 0 ) { return; }
|
||||
if ( hn.length === 1 ) {
|
||||
if ( not || (modeBits & 0b0100) === 0 ) { return; }
|
||||
return s;
|
||||
}
|
||||
if ( (modeBits & 0b0010) !== 0 ) {
|
||||
if ( this.rePlainEntity.test(hn) ) { return s; }
|
||||
if ( this.reEntity.test(hn) === false ) { return; }
|
||||
} else if ( (modeBits & 0b0001) === 0 ) {
|
||||
return;
|
||||
}
|
||||
hn = hn.replace(/\*/g, '__asterisk__');
|
||||
}
|
||||
this.punycoder.hostname = '_';
|
||||
try {
|
||||
this.punycoder.hostname = hn;
|
||||
hn = this.punycoder.hostname;
|
||||
} catch (_) {
|
||||
return;
|
||||
}
|
||||
if ( hn === '_' || hn === '' ) { return; }
|
||||
if ( hasWildcard ) {
|
||||
hn = this.punycoder.hostname.replace(/__asterisk__/g, '*');
|
||||
}
|
||||
if ( len === 0 ) { return false; }
|
||||
// One slice only, check for single asterisk
|
||||
if (
|
||||
len === 3 &&
|
||||
not === false &&
|
||||
(optionBits & 0b10) !== 0 &&
|
||||
hasBits(slices[from], BITAsterisk)
|
||||
(modeBits & 0b0001) === 0 && (
|
||||
hn.charCodeAt(0) === 0x2E /* '.' */ ||
|
||||
hn.charCodeAt(hn.length - 1) === 0x2E /* '.' */
|
||||
)
|
||||
) {
|
||||
return slices[from+2] === 1;
|
||||
return;
|
||||
}
|
||||
// First slice must be regex-equivalent of `\w`
|
||||
if ( hasNoBits(slices[from], BITRegexWord | BITUnicode) ) { return false; }
|
||||
// Last slice
|
||||
if ( len > 3 ) {
|
||||
const last = to - 3;
|
||||
if ( hasBits(slices[last], BITAsterisk) ) {
|
||||
if (
|
||||
(optionBits & 0b01) === 0 ||
|
||||
len < 9 ||
|
||||
slices[last+2] > 1 ||
|
||||
hasNoBits(slices[last-3], BITPeriod)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
} else if ( hasNoBits(slices[to-3], BITAlphaNum | BITUnicode) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Middle slices
|
||||
if ( len > 6 ) {
|
||||
for ( let i = from + 3; i < to - 3; i += 3 ) {
|
||||
const bits = slices[i];
|
||||
if ( hasNoBits(bits, BITHostname) ) { return false; }
|
||||
if ( hasBits(bits, BITPeriod) && slices[i+2] > 1 ) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
hasBits(bits, BITDash) && (
|
||||
hasNoBits(slices[i-3], BITRegexWord | BITUnicode) ||
|
||||
hasNoBits(slices[i+3], BITRegexWord | BITUnicode)
|
||||
)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return not ? '~' + hn : hn;
|
||||
}
|
||||
|
||||
slice(raw) {
|
||||
|
@ -1081,6 +1081,8 @@ const Parser = class {
|
|||
// Be ready to deal with non-punycode-able Unicode characters.
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/772
|
||||
// Encode Unicode characters beyond the hostname part.
|
||||
// Prepend with '*' character to prevent the browser API from refusing to
|
||||
// punycode -- this occurs when the extracted label starts with a dash.
|
||||
toASCII(dryrun = false) {
|
||||
if ( this.patternHasUnicode() === false ) { return true; }
|
||||
const { i, len } = this.patternSpan;
|
||||
|
@ -1090,16 +1092,14 @@ const Parser = class {
|
|||
// Punycode hostname part of the pattern.
|
||||
if ( patternIsRegex === false ) {
|
||||
const match = this.reHostname.exec(pattern);
|
||||
if ( match === null ) { return true; }
|
||||
try {
|
||||
this.punycoder.hostname = match[0].replace(/\*/g, '__asterisk__');
|
||||
} catch(ex) {
|
||||
return false;
|
||||
if ( match !== null ) {
|
||||
const hn = match[0].replace(this.reHostnameLabel, s => {
|
||||
if ( this.reUnicodeChar.test(s) === false ) { return s; }
|
||||
if ( s.charCodeAt(0) === 0x2D /* '-' */ ) { s = '*' + s; }
|
||||
return this.normalizeHostnameValue(s, 0b0001) || s;
|
||||
});
|
||||
pattern = hn + pattern.slice(match.index + match[0].length);
|
||||
}
|
||||
const hn = this.punycoder.hostname;
|
||||
if ( hn === '' ) { return false; }
|
||||
const punycoded = hn.replace(/__asterisk__/g, '*');
|
||||
pattern = punycoded + pattern.slice(match.index + match[0].length);
|
||||
}
|
||||
// Percent-encode remaining Unicode characters.
|
||||
if ( this.reUnicodeChar.test(pattern) ) {
|
||||
|
@ -1755,7 +1755,6 @@ const BITError = 1 << 31;
|
|||
|
||||
const BITAll = 0xFFFFFFFF;
|
||||
const BITAlphaNum = BITNum | BITAlpha;
|
||||
const BITRegexWord = BITAlphaNum | BITUnderscore;
|
||||
const BITHostname = BITNum | BITAlpha | BITUppercase | BITDash | BITPeriod | BITUnderscore | BITUnicode;
|
||||
const BITPatternToken = BITNum | BITAlpha | BITPercent;
|
||||
const BITLineComment = BITExclamation | BITHash | BITSquareBracket;
|
||||
|
@ -2226,7 +2225,7 @@ const NetOptionsIterator = class {
|
|||
if ( this.interactive && hasBits(descriptor, OPTDomainList) ) {
|
||||
this.parser.analyzeDomainList(
|
||||
lval + 3, i, BITPipe,
|
||||
(descriptor & 0xFF) === OPTTokenDomain ? 0b01 : 0b00
|
||||
(descriptor & 0xFF) === OPTTokenDomain ? 0b1010 : 0b0000
|
||||
);
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
*/
|
||||
|
||||
/* jshint bitwise: false */
|
||||
/* global punycode */
|
||||
|
||||
'use strict';
|
||||
|
||||
|
@ -1120,12 +1119,12 @@ const filterOrigin = (( ) => {
|
|||
this.trieContainer = new µb.HNTrieContainer();
|
||||
}
|
||||
|
||||
compile(domainOpt, prepend, units) {
|
||||
compile(domainOptList, prepend, units) {
|
||||
const hostnameHits = [];
|
||||
const hostnameMisses = [];
|
||||
const entityHits = [];
|
||||
const entityMisses = [];
|
||||
for ( const s of FilterParser.domainOptIterator(domainOpt) ) {
|
||||
for ( const s of domainOptList ) {
|
||||
const len = s.length;
|
||||
const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0;
|
||||
const end = len > 2 &&
|
||||
|
@ -1770,7 +1769,7 @@ const FilterDenyAllow = class {
|
|||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterDenyAllow.fid, details.denyallow ];
|
||||
return [ FilterDenyAllow.fid, details.denyallowOpt ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
|
@ -2074,17 +2073,15 @@ const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
|
|||
const FilterParser = class {
|
||||
constructor(parser) {
|
||||
this.cantWebsocket = vAPI.cantWebsocket;
|
||||
this.domainOpt = '';
|
||||
this.noTokenHash = urlTokenizer.noTokenHash;
|
||||
this.reBadDomainOptChars = /[+?^${}()[\]\\]/;
|
||||
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
|
||||
this.reHasUnicode = /[^\x00-\x7F]/;
|
||||
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
|
||||
this.reRegexToken = /[%0-9A-Za-z]{2,}/g;
|
||||
this.reRegexTokenAbort = /[([]/;
|
||||
this.reRegexBadPrefix = /(^|[^\\]\.|[*?{}\\])$/;
|
||||
this.reRegexBadSuffix = /^([^\\]\.|\\[dw]|[([{}?*.]|$)/;
|
||||
this.reGoodToken = /[%0-9a-z]{1,}/g;
|
||||
this.domainOptList = [];
|
||||
this.tokenIdToNormalizedType = new Map([
|
||||
[ parser.OPTTokenCname, bitFromType('cname') ],
|
||||
[ parser.OPTTokenCss, bitFromType('stylesheet') ],
|
||||
|
@ -2237,7 +2234,7 @@ const FilterParser = class {
|
|||
this.thirdParty = false;
|
||||
this.party = AnyParty;
|
||||
this.domainOpt = '';
|
||||
this.denyallow = '';
|
||||
this.denyallowOpt = '';
|
||||
this.isPureHostname = false;
|
||||
this.isRegex = false;
|
||||
this.redirect = 0;
|
||||
|
@ -2291,20 +2288,24 @@ const FilterParser = class {
|
|||
}
|
||||
}
|
||||
|
||||
parseHostnameList(parser, s) {
|
||||
if ( parser.optionHasUnicode() ) {
|
||||
const hostnames = s.split('|');
|
||||
let i = hostnames.length;
|
||||
while ( i-- ) {
|
||||
if ( this.reHasUnicode.test(hostnames[i]) ) {
|
||||
hostnames[i] = punycode.toASCII(hostnames[i]);
|
||||
}
|
||||
parseHostnameList(parser, s, modeBits, out = []) {
|
||||
let beg = 0;
|
||||
let slen = s.length;
|
||||
let i = 0;
|
||||
while ( beg < slen ) {
|
||||
let end = s.indexOf('|', beg);
|
||||
if ( end === -1 ) { end = slen; }
|
||||
const hn = parser.normalizeHostnameValue(
|
||||
s.slice(beg, end),
|
||||
modeBits
|
||||
);
|
||||
if ( hn !== undefined ) {
|
||||
out[i] = hn; i += 1;
|
||||
}
|
||||
s = hostnames.join('|');
|
||||
beg = end + 1;
|
||||
}
|
||||
// TODO: revisit
|
||||
if ( this.reBadDomainOptChars.test(s) ) { return ''; }
|
||||
return s;
|
||||
out.length = i;
|
||||
return i === 1 ? out[0] : out.join('|');
|
||||
}
|
||||
|
||||
parseOptions(parser) {
|
||||
|
@ -2337,12 +2338,17 @@ const FilterParser = class {
|
|||
// Detect and discard filter if domain option contains nonsensical
|
||||
// characters.
|
||||
case parser.OPTTokenDomain:
|
||||
this.domainOpt = this.parseHostnameList(parser, val);
|
||||
this.domainOpt = this.parseHostnameList(
|
||||
parser,
|
||||
val,
|
||||
0b1010,
|
||||
this.domainOptList
|
||||
);
|
||||
if ( this.domainOpt === '' ) { return false; }
|
||||
break;
|
||||
case parser.OPTTokenDenyAllow:
|
||||
this.denyallow = this.parseHostnameList(parser, val);
|
||||
if ( this.denyallow === '' ) { return false; }
|
||||
this.denyallowOpt = this.parseHostnameList(parser, val, 0b0000);
|
||||
if ( this.denyallowOpt === '' ) { return false; }
|
||||
break;
|
||||
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
|
||||
// Add support for `elemhide`. Rarely used but it happens.
|
||||
|
@ -2559,7 +2565,7 @@ const FilterParser = class {
|
|||
isJustOrigin() {
|
||||
return this.isRegex === false &&
|
||||
this.dataType === undefined &&
|
||||
this.denyallow === '' &&
|
||||
this.denyallowOpt === '' &&
|
||||
this.domainOpt !== '' && (
|
||||
this.pattern === '*' || (
|
||||
this.anchor === 0b010 &&
|
||||
|
@ -2961,7 +2967,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
|
|||
if (
|
||||
parsed.isPureHostname &&
|
||||
parsed.domainOpt === '' &&
|
||||
parsed.denyallow === '' &&
|
||||
parsed.denyallowOpt === '' &&
|
||||
parsed.dataType === undefined
|
||||
) {
|
||||
parsed.tokenHash = this.dotTokenHash;
|
||||
|
@ -2990,7 +2996,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
|
|||
parsed.tokenHash = this.anyHTTPTokenHash;
|
||||
}
|
||||
const entities = [];
|
||||
for ( const hn of FilterParser.domainOptIterator(parsed.domainOpt) ) {
|
||||
for ( const hn of parsed.domainOptList ) {
|
||||
if ( parsed.domainIsEntity(hn) === false ) {
|
||||
this.compileToAtomicFilter(parsed, hn, writer);
|
||||
} else {
|
||||
|
@ -3004,7 +3010,7 @@ FilterContainer.prototype.compile = function(parser, writer) {
|
|||
const units = [];
|
||||
filterPattern.compile(parsed, units);
|
||||
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
|
||||
filterOrigin.compile(entity, true, units);
|
||||
filterOrigin.compile([ entity ], true, units);
|
||||
this.compileToAtomicFilter(
|
||||
parsed, FilterCompositeAll.compile(units), writer
|
||||
);
|
||||
|
@ -3034,14 +3040,14 @@ FilterContainer.prototype.compile = function(parser, writer) {
|
|||
// Origin
|
||||
if ( parsed.domainOpt !== '' ) {
|
||||
filterOrigin.compile(
|
||||
parsed.domainOpt,
|
||||
parsed.domainOptList,
|
||||
units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
|
||||
units
|
||||
);
|
||||
}
|
||||
|
||||
// Deny-allow
|
||||
if ( parsed.denyallow !== '' ) {
|
||||
if ( parsed.denyallowOpt !== '' ) {
|
||||
units.push(FilterDenyAllow.compile(parsed));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue