mirror of https://github.com/gorhill/uBlock.git
this fixes #235
This commit is contained in:
parent
8744dece44
commit
51bb22097c
|
@ -89,7 +89,7 @@ return {
|
|||
firstUpdateAfter: 5 * oneMinute,
|
||||
nextUpdateAfter: 7 * oneHour,
|
||||
|
||||
selfieMagic: 'ccolmudazpvm',
|
||||
selfieMagic: 'rniacaqskjwz',
|
||||
selfieAfter: 7 * oneMinute,
|
||||
|
||||
pageStores: {},
|
||||
|
|
|
@ -36,7 +36,7 @@ var LiquidDict = function() {
|
|||
|
||||
// Somewhat arbitrary: I need to come up with hard data to know at which
|
||||
// point binary search is better than indexOf.
|
||||
this.cutoff = 500;
|
||||
this.cutoff = 256;
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
@ -127,7 +127,7 @@ LiquidDict.prototype.test = function(word) {
|
|||
return bucket[word] !== undefined;
|
||||
}
|
||||
if ( bucket.charAt(0) === ' ' ) {
|
||||
return bucket.indexOf(' ' + word + ' ') >= 0;
|
||||
return bucket.indexOf(' ' + word + ' ') !== -1;
|
||||
}
|
||||
// binary search
|
||||
var len = word.length;
|
||||
|
|
|
@ -80,6 +80,7 @@ var reIgnoreComment = /^\[|^!/;
|
|||
var reHostnameRule = /^[0-9a-z][0-9a-z.-]+[0-9a-z]$/;
|
||||
var reHostnameToken = /^[0-9a-z]+/g;
|
||||
var reGoodToken = /[%0-9a-z]{2,}/g;
|
||||
var reURLPostHostnameAnchors = /[\/?#]/;
|
||||
|
||||
var typeNameToTypeValue = {
|
||||
'stylesheet': 2 << 9,
|
||||
|
@ -166,6 +167,9 @@ Filters family tree:
|
|||
- anchored at end
|
||||
- no hostname
|
||||
- specific hostname
|
||||
- anchored within hostname
|
||||
- no hostname
|
||||
- specific hostname (not implemented)
|
||||
|
||||
- one wildcard
|
||||
- anywhere
|
||||
|
@ -177,6 +181,9 @@ Filters family tree:
|
|||
- anchored at end
|
||||
- no hostname
|
||||
- specific hostname
|
||||
- anchored within hostname
|
||||
- no hostname (not implemented)
|
||||
- specific hostname (not implemented)
|
||||
|
||||
- more than one wildcard
|
||||
- anywhere
|
||||
|
@ -188,6 +195,9 @@ Filters family tree:
|
|||
- anchored at end
|
||||
- no hostname
|
||||
- specific hostname
|
||||
- anchored within hostname
|
||||
- no hostname (not implemented)
|
||||
- specific hostname (not implemented)
|
||||
|
||||
*/
|
||||
|
||||
|
@ -458,6 +468,41 @@ FilterPlainRightAnchoredHostname.fromSelfie = function(s) {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/235
|
||||
// The filter is left-anchored somewhere within the hostname part of the URL.
|
||||
|
||||
var FilterPlainHnAnchored = function(s) {
|
||||
this.s = s;
|
||||
};
|
||||
|
||||
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
|
||||
if ( url.substr(tokenBeg, this.s.length) !== this.s ) {
|
||||
return false;
|
||||
}
|
||||
// Valid only if hostname-valid characters to the left of token
|
||||
var pos = url.indexOf('://');
|
||||
return pos !== -1 &&
|
||||
reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false;
|
||||
};
|
||||
|
||||
FilterPlainHnAnchored.prototype.fid = 'h|a';
|
||||
|
||||
FilterPlainHnAnchored.prototype.toString = function() {
|
||||
return '||' + this.s;
|
||||
};
|
||||
|
||||
FilterPlainHnAnchored.prototype.toSelfie = function() {
|
||||
return this.s;
|
||||
};
|
||||
|
||||
FilterPlainHnAnchored.fromSelfie = function(s) {
|
||||
return new FilterPlainHnAnchored(s);
|
||||
};
|
||||
|
||||
// https://www.youtube.com/watch?v=71YS6xDB-E4
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// With a single wildcard, regex is not optimal.
|
||||
// See:
|
||||
// http://jsperf.com/regexp-vs-indexof-abp-miss/3
|
||||
|
@ -770,6 +815,24 @@ FilterManyWildcardsHostname.fromSelfie = function(s) {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
// TODO: Some buckets may grow quite large (see histogram excerpt below).
|
||||
// Evaluate the gain from having an internal dictionary for such large
|
||||
// buckets: the key would be created by concatenating the char preceding and
|
||||
// following the token. The dict would contain smaller buckets, and there
|
||||
// would be a special bucket for those filters for which a prefix, suffix, or
|
||||
// both is missing.
|
||||
// I used to do this, but at a higher level, during tokenization, and in the
|
||||
// end I found out the overhead was to much. I believe it will be a gain
|
||||
// here because the special treatment would be only for a few specific tokens,
|
||||
// not systematically done for all tokens.
|
||||
|
||||
// key=Ȁ ad count=655
|
||||
// key=Ȁ ads count=432
|
||||
// key=̀ doubleclick count= 94
|
||||
// key=Ȁ adv count= 89
|
||||
// key=Ȁ google count= 67
|
||||
// key=Ȁ banner count= 55
|
||||
|
||||
var FilterBucket = function(a, b) {
|
||||
this.f = null;
|
||||
this.filters = [];
|
||||
|
@ -842,6 +905,9 @@ var makeFilter = function(details, tokenBeg) {
|
|||
if ( details.anchor > 0 ) {
|
||||
return new FilterPlainRightAnchored(s);
|
||||
}
|
||||
if ( details.hostnameAnchored ) {
|
||||
return new FilterPlainHnAnchored(s);
|
||||
}
|
||||
if ( tokenBeg === 0 ) {
|
||||
return new FilterPlainPrefix0(s);
|
||||
}
|
||||
|
@ -983,7 +1049,8 @@ FilterParser.prototype.reset = function() {
|
|||
this.f = '';
|
||||
this.firstParty = false;
|
||||
this.fopts = '';
|
||||
this.hostname = false;
|
||||
this.hostnameAnchored = false;
|
||||
this.hostnamePure = false;
|
||||
this.hostnames.length = 0;
|
||||
this.notHostname = false;
|
||||
this.thirdParty = false;
|
||||
|
@ -1060,6 +1127,12 @@ FilterParser.prototype.parse = function(s) {
|
|||
// important!
|
||||
this.reset();
|
||||
|
||||
if ( reHostnameRule.test(s) ) {
|
||||
this.f = s;
|
||||
this.hostnamePure = this.hostnameAnchored = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
// element hiding filter?
|
||||
if ( s.indexOf('##') >= 0 || s.indexOf('#@') >= 0 ) {
|
||||
this.elemHiding = true;
|
||||
|
@ -1087,7 +1160,7 @@ FilterParser.prototype.parse = function(s) {
|
|||
|
||||
// hostname anchoring
|
||||
if ( s.slice(0, 2) === '||' ) {
|
||||
this.hostname = true;
|
||||
this.hostnameAnchored = true;
|
||||
s = s.slice(2);
|
||||
}
|
||||
|
||||
|
@ -1110,7 +1183,12 @@ FilterParser.prototype.parse = function(s) {
|
|||
s = s.replace(/\*\*+/g, '*');
|
||||
|
||||
// remove leading and trailing wildcards
|
||||
this.f = trimChar(s, '*');
|
||||
s = trimChar(s, '*');
|
||||
|
||||
// pure hostname-based?
|
||||
this.hostnamePure = this.hostnameAnchored && reHostnameRule.test(s);
|
||||
|
||||
this.f = s;
|
||||
|
||||
if ( !this.fopts ) {
|
||||
return this;
|
||||
|
@ -1274,6 +1352,7 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
|
|||
'|ah': FilterPlainLeftAnchoredHostname,
|
||||
'a|': FilterPlainRightAnchored,
|
||||
'a|h': FilterPlainRightAnchoredHostname,
|
||||
'h|a': FilterPlainHnAnchored,
|
||||
'*': FilterSingleWildcard,
|
||||
'*h': FilterSingleWildcardHostname,
|
||||
'0*': FilterSingleWildcardPrefix0,
|
||||
|
@ -1345,30 +1424,6 @@ FilterContainer.prototype.makeCategoryKey = function(category) {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.addAnyPartyHostname = function(hostname) {
|
||||
if ( this.blockedAnyPartyHostnames.add(hostname) ) {
|
||||
this.acceptedCount++;
|
||||
this.blockFilterCount++;
|
||||
return true;
|
||||
}
|
||||
this.duplicateCount++;
|
||||
return false;
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.add3rdPartyHostname = function(hostname) {
|
||||
if ( this.blocked3rdPartyHostnames.add(hostname) ) {
|
||||
this.acceptedCount++;
|
||||
this.blockFilterCount++;
|
||||
return true;
|
||||
}
|
||||
this.duplicateCount++;
|
||||
return false;
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterContainer.prototype.add = function(s) {
|
||||
// ORDER OF TESTS IS IMPORTANT!
|
||||
|
||||
|
@ -1396,31 +1451,35 @@ FilterContainer.prototype.add = function(s) {
|
|||
return false;
|
||||
}
|
||||
|
||||
this.processedFilterCount += 1;
|
||||
this.acceptedCount += 1;
|
||||
|
||||
// Pure hostnames, use more efficient liquid dict
|
||||
if ( parsed.hostnamePure && parsed.action === BlockAction ) {
|
||||
if ( parsed.fopts === '' ) {
|
||||
if ( this.blockedAnyPartyHostnames.add(parsed.f) ) {
|
||||
this.blockFilterCount++;
|
||||
} else {
|
||||
this.duplicateCount++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if ( parsed.fopts === 'third-party' ) {
|
||||
if ( this.blocked3rdPartyHostnames.add(parsed.f) ) {
|
||||
this.blockFilterCount++;
|
||||
} else {
|
||||
this.duplicateCount++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( this.duplicates[s] ) {
|
||||
this.duplicateCount++;
|
||||
return false;
|
||||
}
|
||||
this.duplicates[s] = true;
|
||||
|
||||
this.processedFilterCount += 1;
|
||||
|
||||
// Ignore optionless hostname rules, these will be taken care of by µBlock.
|
||||
if ( parsed.hostname && parsed.fopts === '' && parsed.action === BlockAction && reHostnameRule.test(parsed.f) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.acceptedCount += 1;
|
||||
|
||||
// Pure third-party hostnames, use more efficient liquid dict
|
||||
if ( reHostnameRule.test(parsed.f) && parsed.hostname && parsed.action === BlockAction ) {
|
||||
if ( parsed.fopts === 'third-party' ) {
|
||||
return this.blocked3rdPartyHostnames.add(parsed.f);
|
||||
}
|
||||
if ( parsed.fopts === '' ) {
|
||||
return this.blockedAnyPartyHostnames.add(parsed.f);
|
||||
}
|
||||
}
|
||||
|
||||
var r = this.addFilter(parsed);
|
||||
if ( r === false ) {
|
||||
return false;
|
||||
|
@ -1439,16 +1498,22 @@ FilterContainer.prototype.add = function(s) {
|
|||
FilterContainer.prototype.addFilter = function(parsed) {
|
||||
// TODO: avoid duplicates
|
||||
|
||||
var matches = parsed.hostname ? findHostnameToken(parsed.f) : findFirstGoodToken(parsed.f);
|
||||
var matches = parsed.hostnameAnchored ?
|
||||
findHostnameToken(parsed.f) :
|
||||
findFirstGoodToken(parsed.f);
|
||||
if ( !matches || !matches[0].length ) {
|
||||
return false;
|
||||
}
|
||||
var tokenBeg = matches.index;
|
||||
var tokenEnd = parsed.hostname ? reHostnameToken.lastIndex : reGoodToken.lastIndex;
|
||||
var tokenEnd = parsed.hostnameAnchored ?
|
||||
reHostnameToken.lastIndex :
|
||||
reGoodToken.lastIndex;
|
||||
var filter;
|
||||
|
||||
var i = parsed.hostnames.length;
|
||||
|
||||
// Applies to specific domains
|
||||
|
||||
if ( i !== 0 && !parsed.notHostname ) {
|
||||
while ( i-- ) {
|
||||
filter = makeHostnameFilter(parsed, tokenBeg, parsed.hostnames[i]);
|
||||
|
@ -1466,6 +1531,8 @@ FilterContainer.prototype.addFilter = function(parsed) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Applies to all domains, with exception(s)
|
||||
|
||||
// https://github.com/gorhill/uBlock/issues/191
|
||||
// Invert the purpose of the filter for negated hostnames
|
||||
if ( i !== 0 && parsed.notHostname ) {
|
||||
|
@ -1498,6 +1565,8 @@ FilterContainer.prototype.addFilter = function(parsed) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Applies to all domains without exceptions
|
||||
|
||||
filter = makeFilter(parsed, tokenBeg);
|
||||
if ( !filter ) {
|
||||
return false;
|
||||
|
@ -1630,18 +1699,15 @@ FilterContainer.prototype.matchTokens = function(url) {
|
|||
// specialized to deal with other complex filters.
|
||||
|
||||
FilterContainer.prototype.matchAnyPartyHostname = function(requestHostname) {
|
||||
// Quick test first
|
||||
if ( this.blockedAnyPartyHostnames.test(requestHostname) ) {
|
||||
return '||' + requestHostname + '^';
|
||||
}
|
||||
// Check parent hostnames if quick test failed
|
||||
var hostnames = µb.URI.parentHostnamesFromHostname(requestHostname);
|
||||
for ( var i = 0, n = hostnames.length; i < n; i++ ) {
|
||||
if ( this.blockedAnyPartyHostnames.test(hostnames[i]) ) {
|
||||
return '||' + hostnames[i] + '^';
|
||||
var pos;
|
||||
while ( this.blockedAnyPartyHostnames.test(requestHostname) !== true ) {
|
||||
pos = requestHostname.indexOf('.');
|
||||
if ( pos === -1 ) {
|
||||
return false;
|
||||
}
|
||||
requestHostname = requestHostname.slice(pos + 1);
|
||||
}
|
||||
return false;
|
||||
return '||' + requestHostname + '^';
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
@ -1655,18 +1721,15 @@ FilterContainer.prototype.matchAnyPartyHostname = function(requestHostname) {
|
|||
// specialized to deal with other complex filters.
|
||||
|
||||
FilterContainer.prototype.match3rdPartyHostname = function(requestHostname) {
|
||||
// Quick test first
|
||||
if ( this.blocked3rdPartyHostnames.test(requestHostname) ) {
|
||||
return '||' + requestHostname + '^$third-party';
|
||||
}
|
||||
// Check parent hostnames if quick test failed
|
||||
var hostnames = µb.URI.parentHostnamesFromHostname(requestHostname);
|
||||
for ( var i = 0, n = hostnames.length; i < n; i++ ) {
|
||||
if ( this.blocked3rdPartyHostnames.test(hostnames[i]) ) {
|
||||
return '||' + hostnames[i] + '^$third-party';
|
||||
var pos;
|
||||
while ( this.blocked3rdPartyHostnames.test(requestHostname) !== true ) {
|
||||
pos = requestHostname.indexOf('.');
|
||||
if ( pos === -1 ) {
|
||||
return false;
|
||||
}
|
||||
requestHostname = requestHostname.slice(pos + 1);
|
||||
}
|
||||
return false;
|
||||
return '||' + requestHostname + '^$third-party';
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
@ -339,9 +339,7 @@
|
|||
var parseCosmeticFilters = this.userSettings.parseAllABPHideFilters;
|
||||
var duplicateCount = netFilteringEngine.duplicateCount + cosmeticFilteringEngine.duplicateCount;
|
||||
var acceptedCount = netFilteringEngine.acceptedCount + cosmeticFilteringEngine.acceptedCount;
|
||||
var reLocalhost = /(^|\s)(localhost\.localdomain|localhost|local|broadcasthost|0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)(?=\s|$)/g;
|
||||
var reAdblockFilter = /^[^a-z0-9:]|[^a-z0-9]$|[^a-z0-9_:.-]/;
|
||||
var reAdblockHostFilter = /^\|\|([a-z0-9.-]+[a-z0-9])\^?$/;
|
||||
var reLocalhost = /(?:^|\s)(?:localhost\.localdomain|localhost|local|broadcasthost|0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)(?=\s|$)/g;
|
||||
var reAsciiSegment = /^[\x21-\x7e]+$/;
|
||||
var matches;
|
||||
var lineBeg = 0, lineEnd, currentLineBeg;
|
||||
|
@ -392,7 +390,8 @@
|
|||
// The filter is whatever sequence of printable ascii character without
|
||||
// whitespaces
|
||||
matches = reAsciiSegment.exec(line);
|
||||
if ( !matches || matches.length === 0 ) {
|
||||
if ( matches === null ) {
|
||||
//console.debug('µBlock.mergeUbiquitousBlacklist(): skipping "%s"', lineRaw);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -404,27 +403,7 @@
|
|||
continue;
|
||||
}
|
||||
|
||||
line = matches[0];
|
||||
|
||||
// Likely an ABP net filter?
|
||||
if ( reAdblockFilter.test(line) ) {
|
||||
if ( netFilteringEngine.add(line) ) {
|
||||
continue;
|
||||
}
|
||||
// rhill 2014-01-22: Transpose possible Adblock Plus-filter syntax
|
||||
// into a plain hostname if possible.
|
||||
matches = reAdblockHostFilter.exec(line);
|
||||
if ( !matches || matches.length < 2 ) {
|
||||
continue;
|
||||
}
|
||||
line = matches[1];
|
||||
}
|
||||
|
||||
if ( line === '' ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
netFilteringEngine.addAnyPartyHostname(line);
|
||||
netFilteringEngine.add(matches[0]);
|
||||
}
|
||||
|
||||
// For convenience, store the number of entries for this
|
||||
|
|
Loading…
Reference in New Issue