code review for static extended filtering, notably:

- use domain-derived integer hash to store filters

- remove code meant for firefox/legacy

- properly handle subdomains of entity-based filters
This commit is contained in:
Raymond Hill 2018-09-09 08:10:09 -04:00
parent 4682a33121
commit 06fe7e6871
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
5 changed files with 160 additions and 179 deletions

View File

@ -139,7 +139,7 @@ var µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 4, // Increase when compiled format changes
compiledMagic: 5, // Increase when compiled format changes
selfieMagic: 4 // Increase when selfie format changes
},

View File

@ -338,51 +338,6 @@ SelectorCacheEntry.prototype = {
/******************************************************************************/
/******************************************************************************/
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved: 0=exception
// | 1=procedural
// +------ bit 15-4: FNV
let makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
let i1 = token.length;
let i2 = i1 >> 1;
let i4 = i1 >> 2;
let i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
return hval & 0xFFF0;
};
/******************************************************************************/
/******************************************************************************/
// Cosmetic filter family tree:
//
// Generic
@ -769,25 +724,19 @@ FilterContainer.prototype.compileSpecificSelector = function(
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { return; }
// https://github.com/chrisaljoudi/uBlock/issues/188
// If not a real domain as per PSL, assign a synthetic one
let hash;
if ( hostname.endsWith('.*') === false ) {
let domain = this.µburi.domainFromHostnameNoCache(hostname);
hash = domain !== '' ? makeHash(domain) : 0;
} else {
hash = makeHash(hostname);
}
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname);
// Exception?
if ( unhide === 1 ) {
hash |= 0b01;
hash |= 0b0001;
}
writer.push([
8,
compiled.charCodeAt(0) !== 0x7B ? hash : hash | 0b10,
hostname,
compiled
]);
// Procedural?
if ( compiled.charCodeAt(0) === 0x7B ) {
hash |= 0b0010;
}
writer.push([ 8, hash, hostname, compiled ]);
};
/******************************************************************************/
@ -1268,8 +1217,9 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
if ( options.noCosmeticFiltering !== true ) {
let entity = request.entity,
domainHash = makeHash(request.domain),
entityHash = entity !== '' ? makeHash(entity) : undefined;
domainHash = µb.staticExtFilteringEngine.makeHash(request.domain),
entityHash = µb.staticExtFilteringEngine.makeHash(entity),
bucket;
// Exception cosmetic filters: prime with generic exception filters.
let exceptionSet = this.setRegister0;
@ -1278,32 +1228,34 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
exceptionSet.add(exception);
}
// Specific exception cosmetic filters.
let bucket = this.specificFilters.get(domainHash | 0b01);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(domainHash | 0b11);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(domainHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
}
// Specific entity-based exception cosmetic filters.
if ( entityHash !== undefined ) {
bucket = this.specificFilters.get(entityHash | 0b01);
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
bucket = this.specificFilters.get(entityHash | 0b11);
bucket = this.specificFilters.get(entityHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
}
// Special bucket for those filters without a valid
// domain name as per PSL.
bucket = this.specificFilters.get(0b01);
bucket = this.specificFilters.get(0 | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(0b11);
bucket = this.specificFilters.get(0 | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
@ -1317,20 +1269,23 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// slightly content script code.
let specificSet = this.setRegister1;
// Specific cosmetic filters.
bucket = this.specificFilters.get(domainHash | 0b00);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
}
// Specific entity-based cosmetic filters.
if ( entityHash !== undefined ) {
bucket = this.specificFilters.get(entityHash | 0b00);
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(entity, specificSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name as per PSL
bucket = this.specificFilters.get(0b00);
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
@ -1346,20 +1301,23 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// Procedural cosmetic filters.
let proceduralSet = this.setRegister2;
// Specific cosmetic filters.
bucket = this.specificFilters.get(domainHash | 0b10);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
}
}
// Specific entity-based cosmetic filters.
if ( entityHash !== undefined ) {
bucket = this.specificFilters.get(entityHash | 0b10);
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(entity, proceduralSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name as per PSL
bucket = this.specificFilters.get(0b10);
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
}

View File

@ -226,7 +226,7 @@
};
api.compile = function(parsed, writer) {
var selector = parsed.suffix.slice(1).trim(),
let selector = parsed.suffix.slice(1).trim(),
compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; }
@ -235,13 +235,16 @@
// TODO: Mind negated hostnames, they are currently discarded.
for ( var hostname of parsed.hostnames ) {
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
var domain = µb.URI.domainFromHostname(hostname);
for ( let hn of parsed.hostnames ) {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
if ( parsed.exception ) {
hash |= 0b0001;
}
writer.push([
compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65,
parsed.exception ? '!' + domain : domain,
hostname,
hash,
hn,
compiled
]);
}
@ -249,7 +252,7 @@
api.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported.
//if ( µb.canFilterResponseBody === false ) { return; }
if ( µb.canFilterResponseBody === false ) { return; }
// 1002 = html filtering
reader.select(1002);
@ -272,7 +275,7 @@
};
api.retrieve = function(request) {
var hostname = request.hostname;
let hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835
// Do not filter if the site is under an `allow` rule.
@ -283,12 +286,16 @@
return;
}
var out = [];
if ( request.domain !== '' ) {
filterDB.retrieve(request.domain, hostname, out);
filterDB.retrieve(request.entity, request.entity, out);
let out = [];
let domainHash = µb.staticExtFilteringEngine.makeHash(request.domain);
if ( domainHash !== 0 ) {
filterDB.retrieve(domainHash, hostname, out);
}
filterDB.retrieve('', hostname, out);
let entityHash = µb.staticExtFilteringEngine.makeHash(request.entity);
if ( entityHash !== 0 ) {
filterDB.retrieve(entityHash, request.entity, out);
}
filterDB.retrieve(0, hostname, out);
// TODO: handle exceptions.
@ -326,53 +333,6 @@
pselectors.clear();
};
// TODO: Following methods is useful only to legacy Firefox. This can be
// removed once support for legacy Firefox is dropped. The only care
// at this point is for the code to work, not to be efficient.
// Only `script:has-text` selectors are considered.
api.retrieveScriptTagHostnames = function() {
var out = new Set();
for ( var entry of filterDB ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(entry.hostname);
}
}
if ( out.size !== 0 ) {
return Array.from(out);
}
};
api.retrieveScriptTagRegex = function(domain, hostname) {
var entries = api.retrieve({
hostname: hostname,
domain: domain,
entity: µb.URI.entityFromDomain(domain)
});
if ( entries === undefined ) { return; }
var out = new Set();
for ( var entry of entries ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(o.tasks[0][1]);
}
}
if ( out.size !== 0 ) {
return Array.from(out).join('|');
}
};
Object.defineProperties(api, {
acceptedCount: {
get: function() {

View File

@ -244,7 +244,7 @@
if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) {
writer.push([ 32, '!', '', parsed.suffix ]);
writer.push([ 32, 0 | 0b0001, '', parsed.suffix ]);
}
return;
}
@ -253,21 +253,19 @@
// Ignore instances of exception filter with negated hostnames,
// because there is no way to create an exception to an exception.
let µburi = µb.URI;
for ( let hostname of parsed.hostnames ) {
let negated = hostname.charCodeAt(0) === 0x7E /* '~' */;
for ( let hn of parsed.hostnames ) {
let negated = hn.charCodeAt(0) === 0x7E /* '~' */;
if ( negated ) {
hostname = hostname.slice(1);
hn = hn.slice(1);
}
let hash = µburi.domainFromHostname(hostname);
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
if ( parsed.exception ) {
if ( negated ) { continue; }
hash = '!' + hash;
hash |= 0b0001;
} else if ( negated ) {
hash = '!' + hash;
hash |= 0b0001;
}
writer.push([ 32, hash, hostname, parsed.suffix ]);
writer.push([ 32, hash, hn, parsed.suffix ]);
}
};
@ -301,10 +299,10 @@
if ( scriptletDB.size === 0 ) { return; }
if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; }
var reng = µb.redirectEngine;
let reng = µb.redirectEngine;
if ( !reng ) { return; }
var hostname = request.hostname;
let hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835
// Do not inject scriptlets if the site is under an `allow` rule.
@ -320,7 +318,7 @@
// https://github.com/gorhill/uBlock/issues/1954
// Implicit
var hn = hostname;
let hn = hostname;
for (;;) {
lookupScriptlet(hn + '.js', reng, scriptletsRegister);
if ( hn === domain ) { break; }
@ -334,11 +332,15 @@
// Explicit
let entries = [];
if ( domain !== '' ) {
scriptletDB.retrieve(domain, hostname, entries);
scriptletDB.retrieve(entity, entity, entries);
let domainHash = µb.staticExtFilteringEngine.makeHash(domain);
if ( domainHash !== 0 ) {
scriptletDB.retrieve(domainHash, hostname, entries);
}
scriptletDB.retrieve('', hostname, entries);
let entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash, entity, entries);
}
scriptletDB.retrieve(0, hostname, entries);
for ( let entry of entries ) {
lookupScriptlet(entry.token, reng, scriptletsRegister);
}
@ -347,11 +349,13 @@
// Collect exception filters.
entries = [];
if ( domain !== '' ) {
scriptletDB.retrieve('!' + domain, hostname, entries);
scriptletDB.retrieve('!' + entity, entity, entries);
if ( domainHash !== 0 ) {
scriptletDB.retrieve(domainHash | 0b0001, hostname, entries);
}
scriptletDB.retrieve('!', hostname, entries);
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash | 0b0001, entity, entries);
}
scriptletDB.retrieve(0 | 0b0001, hostname, entries);
for ( let entry of entries ) {
exceptionsRegister.add(entry.token);
}

View File

@ -405,7 +405,7 @@
api.HostnameBasedDB.prototype = {
add: function(hash, entry) {
var bucket = this.db.get(hash);
let bucket = this.db.get(hash);
if ( bucket === undefined ) {
this.db.set(hash, entry);
} else if ( Array.isArray(bucket) ) {
@ -420,16 +420,21 @@
this.size = 0;
},
retrieve: function(hash, hostname, out) {
var bucket = this.db.get(hash);
let bucket = this.db.get(hash);
if ( bucket === undefined ) { return; }
if ( Array.isArray(bucket) === false ) {
if ( hostname.endsWith(bucket.hostname) ) { out.push(bucket); }
return;
bucket = [ bucket ];
}
var i = bucket.length;
while ( i-- ) {
var entry = bucket[i];
if ( hostname.endsWith(entry.hostname) ) { out.push(entry); }
for ( let entry of bucket ) {
if ( hostname.endsWith(entry.hostname) === false ) { continue; }
let i = hostname.length - entry.hostname.length;
if (
i === 0 ||
i === hostname.length ||
hostname.charCodeAt(i-1) === 0x2E /* '.' */
) {
out.push(entry);
}
}
},
toSelfie: function() {
@ -484,6 +489,60 @@
resetParsed(parsed);
};
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved
// +------ bit 15-4: FNV
api.makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
let i1 = token.length;
if ( i1 === 0 ) { return 0; }
let i2 = i1 >> 1;
let i4 = i1 >> 2;
let i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval &= 0xFFF0;
// Can't return 0, it's reserved for empty string.
return hval !== 0 ? hval : 0xfff0;
};
api.compileHostnameToHash = function(hostname) {
let domain;
if ( hostname.endsWith('.*') ) {
let pos = hostname.lastIndexOf('.', hostname.length - 3);
domain = pos !== -1 ? hostname.slice(pos + 1) : hostname;
} else {
domain = µb.URI.domainFromHostnameNoCache(hostname);
}
return api.makeHash(domain);
};
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.