Refactor runtime storage of specific cosmetic filters

This was a TODO item:
- 07cbae66a4/src/js/cosmetic-filtering.js (L375)

µBlock.staticExtFilteringEngine.HostnameBasedDB has been
re-factored to accomodate the storing of specific cosmetic
filters.

As a result of this refactoring:

- Memory usage has been further decreased
- Performance of selector retrieval marginally
  improved
- New internal representation opens the door
  to use a specialized version of HNTrie, which
  should further improve performance/memory
  usage
This commit is contained in:
Raymond Hill 2019-05-14 08:52:34 -04:00
parent 8a312b9bbb
commit 93f80eedfa
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
6 changed files with 271 additions and 624 deletions

View File

@ -137,8 +137,8 @@ const µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 14, // Increase when compiled format changes
selfieMagic: 14 // Increase when selfie format changes
compiledMagic: 15, // Increase when compiled format changes
selfieMagic: 15 // Increase when selfie format changes
},
restoreBackupSettings: {

View File

@ -27,8 +27,8 @@
/******************************************************************************/
let µb = µBlock;
let cosmeticSurveyingMissCountMax =
const µb = µBlock;
const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15;
@ -38,212 +38,30 @@ window.addEventListener('webextFlavor', function() {
supportsUserStylesheets = vAPI.webextFlavor.soup.has('user_stylesheet');
}, { once: true });
/*******************************************************************************
Each filter class will register itself in the map.
IMPORTANT: any change which modifies the mapping will have to be
reflected with µBlock.systemSettings.compiledMagic.
**/
let filterClasses = [];
let registerFilterClass = function(ctor) {
filterClasses[ctor.prototype.fid] = ctor;
};
let filterFromCompiledData = function(args) {
return filterClasses[args[0]].load(args);
};
/******************************************************************************/
/******************************************************************************/
// One hostname => one selector
const FilterOneOne = function(hostname, selector) {
this.hostname = hostname;
this.selector = selector;
};
FilterOneOne.prototype = {
fid: 8,
// Since this class can hold only one single selector, adding a new
// hostname-selector requires to morph the filter instance into a
// better-suited class.
add: function(hostname, selector) {
if ( hostname === this.hostname ) {
return new FilterOneMany(
this.hostname,
[ this.selector, selector ]
);
}
return new FilterManyAny([
[ this.hostname, this.selector ],
[ hostname, selector ]
]);
},
retrieve: function(target, out) {
if ( target.endsWith(this.hostname) === false ) { return; }
const i = target.length - this.hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) { return; }
out.add(this.selector);
},
compile: function() {
return [ this.fid, this.hostname, this.selector ];
const SelectorCacheEntry = class {
constructor() {
this.reset();
}
};
FilterOneOne.load = function(data) {
return new FilterOneOne(data[1], data[2]);
};
registerFilterClass(FilterOneOne);
/******************************************************************************/
// One hostname => many selectors
const FilterOneMany = function(hostname, selectors) {
this.hostname = hostname;
this.selectors = selectors;
};
FilterOneMany.prototype = {
fid: 9,
// Since this class can hold selectors for only one specific hostname,
// adding a new hostname will require to morph the filter instance into a
// better-suited class.
add: function(hostname, selector) {
if ( hostname === this.hostname ) {
this.selectors.push(selector);
return this;
}
return new FilterManyAny([
[ this.hostname, this.selectors ],
[ hostname, selector ]
]);
},
retrieve: function(target, out) {
if ( target.endsWith(this.hostname) === false ) { return; }
const i = target.length - this.hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) { return; }
for ( let selector of this.selectors ) {
out.add(selector);
}
},
compile: function() {
return [ this.fid, this.hostname, this.selectors ];
}
};
FilterOneMany.load = function(data) {
return new FilterOneMany(data[1], data[2]);
};
registerFilterClass(FilterOneMany);
/******************************************************************************/
// Many hostnames => one or many selectors
let FilterManyAny = function(entries) {
this.entries = new Map(entries);
};
FilterManyAny.prototype = {
fid: 10,
add: function(hostname, selector) {
const selectors = this.entries.get(hostname);
if ( selectors === undefined ) {
this.entries.set(hostname, selector);
} else if ( typeof selectors === 'string' ) {
this.entries.set(hostname, [ selectors, selector ]);
} else {
selectors.push(selector);
}
},
retrieve: function(target, out) {
for ( const entry of this.entries ) {
const hostname = entry[0];
if ( target.endsWith(hostname) === false ) { continue; }
const i = target.length - hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) {
continue;
}
const selectors = entry[1];
if ( typeof selectors === 'string' ) {
out.add(selectors);
continue;
}
for ( const selector of selectors ) {
out.add(selector);
}
}
},
compile: function() {
return [ this.fid, Array.from(this.entries) ];
}
};
FilterManyAny.load = function(data) {
return new FilterManyAny(data[1]);
};
registerFilterClass(FilterManyAny);
/******************************************************************************/
/******************************************************************************/
let SelectorCacheEntry = function() {
this.reset();
};
/******************************************************************************/
SelectorCacheEntry.junkyard = [];
SelectorCacheEntry.factory = function() {
let entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
}
return new SelectorCacheEntry();
};
/******************************************************************************/
let netSelectorCacheLowWaterMark = 20;
let netSelectorCacheHighWaterMark = 30;
/******************************************************************************/
SelectorCacheEntry.prototype = {
reset: function() {
reset() {
this.cosmetic = new Set();
this.cosmeticSurveyingMissCount = 0;
this.net = new Map();
this.lastAccessTime = Date.now();
return this;
},
}
dispose: function() {
dispose() {
this.cosmetic = this.net = null;
if ( SelectorCacheEntry.junkyard.length < 25 ) {
SelectorCacheEntry.junkyard.push(this);
}
},
}
addCosmetic: function(details) {
addCosmetic(details) {
let selectors = details.selectors,
i = selectors.length || 0;
// https://github.com/gorhill/uBlock/issues/2011
@ -258,50 +76,52 @@ SelectorCacheEntry.prototype = {
while ( i-- ) {
this.cosmetic.add(selectors[i]);
}
},
}
addNet: function(selectors) {
addNet(selectors) {
if ( typeof selectors === 'string' ) {
this.addNetOne(selectors, Date.now());
} else {
this.addNetMany(selectors, Date.now());
}
// Net request-derived selectors: I limit the number of cached selectors,
// as I expect cases where the blocked net-requests are never the
// exact same URL.
if ( this.net.size < netSelectorCacheHighWaterMark ) { return; }
// Net request-derived selectors: I limit the number of cached
// selectors, as I expect cases where the blocked net-requests
// are never the exact same URL.
if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) {
return;
}
let dict = this.net;
let keys = Array.from(dict.keys()).sort(function(a, b) {
return dict.get(b) - dict.get(a);
}).slice(netSelectorCacheLowWaterMark);
}).slice(SelectorCacheEntry.netLowWaterMark);
let i = keys.length;
while ( i-- ) {
dict.delete(keys[i]);
}
},
}
addNetOne: function(selector, now) {
addNetOne(selector, now) {
this.net.set(selector, now);
},
}
addNetMany: function(selectors, now) {
addNetMany(selectors, now) {
let i = selectors.length || 0;
while ( i-- ) {
this.net.set(selectors[i], now);
}
},
}
add: function(details) {
add(details) {
this.lastAccessTime = Date.now();
if ( details.type === 'cosmetic' ) {
this.addCosmetic(details);
} else {
this.addNet(details.selectors);
}
},
}
// https://github.com/chrisaljoudi/uBlock/issues/420
remove: function(type) {
remove(type) {
this.lastAccessTime = Date.now();
if ( type === undefined || type === 'cosmetic' ) {
this.cosmetic.clear();
@ -310,21 +130,21 @@ SelectorCacheEntry.prototype = {
if ( type === undefined || type === 'net' ) {
this.net.clear();
}
},
}
retrieveToArray: function(iterator, out) {
retrieveToArray(iterator, out) {
for ( let selector of iterator ) {
out.push(selector);
}
},
}
retrieveToSet: function(iterator, out) {
retrieveToSet(iterator, out) {
for ( let selector of iterator ) {
out.add(selector);
}
},
}
retrieve: function(type, out) {
retrieve(type, out) {
this.lastAccessTime = Date.now();
let iterator = type === 'cosmetic' ? this.cosmetic : this.net.keys();
if ( Array.isArray(out) ) {
@ -333,8 +153,20 @@ SelectorCacheEntry.prototype = {
this.retrieveToSet(iterator, out);
}
}
static factory() {
const entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
}
return new SelectorCacheEntry();
}
};
SelectorCacheEntry.netLowWaterMark = 20;
SelectorCacheEntry.netHighWaterMark = 30;
SelectorCacheEntry.junkyard = [];
/******************************************************************************/
/******************************************************************************/
@ -353,7 +185,7 @@ SelectorCacheEntry.prototype = {
// Generic filters can only be enforced once the main document is loaded.
// Specific filers can be enforced before the main document is loaded.
let FilterContainer = function() {
const FilterContainer = function() {
this.reHasUnicode = /[^\x00-\x7F]/;
this.rePlainSelector = /^[#.][\w\\-]+/;
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
@ -366,17 +198,14 @@ let FilterContainer = function() {
this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes
this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes
this.selectorCacheCountMin = 25;
this.netSelectorCacheCountMax = netSelectorCacheHighWaterMark;
this.netSelectorCacheCountMax = SelectorCacheEntry.netHighWaterMark;
this.selectorCacheTimer = null;
// generic exception filters
this.genericDonthideSet = new Set();
// TODO: Think about reusing µb.staticExtFilteringEngine.HostnameBasedDB
// for both specific and procedural filters. This would require some
// refactoring.
// hostname, entity-based filters
this.specificFilters = new Map();
// specific filters
this.specificFilters = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
// low generic cosmetic filters, organized by id/class then simple/complex.
this.lowlyGeneric = Object.create(null);
@ -464,7 +293,8 @@ FilterContainer.prototype.reset = function() {
/******************************************************************************/
FilterContainer.prototype.freeze = function() {
this.duplicateBuster = new Set();
this.duplicateBuster.clear();
this.specificFilters.collectGarbage();
this.hasGenericHide =
this.lowlyGeneric.id.simple.size !== 0 ||
@ -694,7 +524,7 @@ FilterContainer.prototype.compileSpecificSelector = function(
unhide ^= 1;
}
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
@ -705,19 +535,16 @@ FilterContainer.prototype.compileSpecificSelector = function(
return;
}
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname);
// Exception?
let kind = 0;
if ( unhide === 1 ) {
hash |= 0b0001;
kind |= 0b01; // Exception
}
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
kind |= 0b10; // Procedural
}
// Procedural?
if ( compiled.charCodeAt(0) === 0x7B ) {
hash |= 0b0010;
}
writer.push([ 8, hash, hostname, compiled ]);
writer.push([ 8, hostname, kind, compiled ]);
};
/******************************************************************************/
@ -739,14 +566,14 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
while ( reader.next() ) {
this.acceptedCount += 1;
let fingerprint = reader.fingerprint();
const fingerprint = reader.fingerprint();
if ( this.duplicateBuster.has(fingerprint) ) {
this.discardedCount += 1;
continue;
}
this.duplicateBuster.add(fingerprint);
let args = reader.args();
const args = reader.args();
switch ( args[0] ) {
@ -805,20 +632,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
// hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet
case 8:
bucket = this.specificFilters.get(args[1]);
if ( bucket === undefined ) {
this.specificFilters.set(
args[1],
new FilterOneOne(args[2], args[3])
);
} else if ( bucket instanceof FilterManyAny ) {
bucket.add(args[2], args[3]);
} else /* can morph, so we need to replace entry in map */ {
this.specificFilters.set(
args[1],
bucket.add(args[2], args[3])
);
}
this.specificFilters.store(args[1], args[2], args[3]);
break;
default:
@ -856,21 +670,7 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
// hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet
case 8:
this.duplicateBuster.add(fingerprint);
const bucket = this.specificFilters.get(args[1]);
if ( bucket === undefined ) {
this.specificFilters.set(
args[1],
new FilterOneOne(args[2], args[3])
);
} else if ( bucket instanceof FilterManyAny ) {
bucket.add(args[2], args[3]);
} else /* can morph, so we need to replace entry in map */ {
this.specificFilters.set(
args[1],
bucket.add(args[2], args[3])
);
}
this.specificFilters.store(args[1], args[2], args[3]);
break;
default:
@ -895,18 +695,10 @@ FilterContainer.prototype.skipCompiledContent = function(reader) {
/******************************************************************************/
FilterContainer.prototype.toSelfie = function() {
let selfieFromMap = function(map) {
let entries = [];
for ( let entry of map ) {
entries.push([ entry[0], entry[1].compile() ]);
}
return entries;
};
return {
acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount,
specificFilters: selfieFromMap(this.specificFilters),
specificFilters: this.specificFilters.toSelfie(),
hasGenericHide: this.hasGenericHide,
lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple),
lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex),
@ -921,17 +713,9 @@ FilterContainer.prototype.toSelfie = function() {
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
let mapFromSelfie = function(entries) {
let out = new Map();
for ( let entry of entries ) {
out.set(entry[0], filterFromCompiledData(entry[1]));
}
return out;
};
this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount;
this.specificFilters = mapFromSelfie(selfie.specificFilters);
this.specificFilters.fromSelfie(selfie.specificFilters);
this.hasGenericHide = selfie.hasGenericHide;
this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID);
this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID);
@ -1163,8 +947,8 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
) {
//console.time('cosmeticFilteringEngine.retrieveSpecificSelectors');
let hostname = request.hostname,
cacheEntry = this.selectorCache.get(hostname);
const hostname = request.hostname;
const cacheEntry = this.selectorCache.get(hostname);
// https://github.com/chrisaljoudi/uBlock/issues/587
// out.ready will tell the content script the cosmetic filtering engine is
@ -1173,7 +957,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// https://github.com/chrisaljoudi/uBlock/issues/497
// Generic exception filters are to be applied on all pages.
let out = {
const out = {
ready: this.frozen,
hostname: hostname,
domain: request.domain,
@ -1190,79 +974,14 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
};
if ( options.noCosmeticFiltering !== true ) {
let entity = request.entity,
domainHash = µb.staticExtFilteringEngine.makeHash(request.domain),
entityHash = µb.staticExtFilteringEngine.makeHash(entity),
bucket;
// Exception cosmetic filters: prime with generic exception filters.
let exceptionSet = this.setRegister0;
const exceptionSet = this.setRegister0;
// Genetic exceptions (should be extremely rare).
for ( let exception of this.genericDonthideSet ) {
exceptionSet.add(exception);
}
// Specific exception cosmetic filters.
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(domainHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
}
// Specific entity-based exception cosmetic filters.
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
bucket = this.specificFilters.get(entityHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
}
// Special bucket for those filters without a valid
// domain name as per PSL.
bucket = this.specificFilters.get(0 | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(0 | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
if ( exceptionSet.size !== 0 ) {
out.exceptionFilters = Array.from(exceptionSet);
}
// Declarative cosmetic filters.
// TODO: Should I go one step further and store specific simple and
// specific complex in different collections? This could simplify
// slightly content script code.
let specificSet = this.setRegister1;
// Specific cosmetic filters.
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
}
// Specific entity-based cosmetic filters.
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(entity, specificSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
const specificSet = this.setRegister1;
// Cached cosmetic filters: these are always declarative.
if ( cacheEntry !== undefined ) {
cacheEntry.retrieve('cosmetic', specificSet);
@ -1272,32 +991,21 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
}
}
// Procedural cosmetic filters.
let proceduralSet = this.setRegister2;
// Specific cosmetic filters.
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
}
}
// Specific entity-based cosmetic filters.
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(entity, proceduralSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
const proceduralSet = this.setRegister2;
this.specificFilters.retrieve(
hostname,
[ specificSet, exceptionSet, proceduralSet, exceptionSet ]
);
if ( request.entity !== '' ) {
this.specificFilters.retrieve(
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
[ specificSet, exceptionSet, proceduralSet, exceptionSet ]
);
}
// Apply exceptions.
for ( let exception of exceptionSet ) {
for ( const exception of exceptionSet ) {
specificSet.delete(exception);
proceduralSet.delete(exception);
}
@ -1317,15 +1025,15 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// string in memory, which I have observed occurs when the string is
// stored directly as a value in a Map.
if ( options.noGenericCosmeticFiltering !== true ) {
let exceptionHash = out.exceptionFilters.join();
for ( let type in this.highlyGeneric ) {
let entry = this.highlyGeneric[type];
const exceptionHash = out.exceptionFilters.join();
for ( const type in this.highlyGeneric ) {
const entry = this.highlyGeneric[type];
let str = entry.mru.lookup(exceptionHash);
if ( str === undefined ) {
str = { s: entry.str };
let genericSet = entry.dict;
let hit = false;
for ( let exception of exceptionSet ) {
for ( const exception of exceptionSet ) {
if ( (hit = genericSet.has(exception)) ) { break; }
}
if ( hit ) {
@ -1349,7 +1057,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// CSS selectors for collapsible blocked elements
if ( cacheEntry ) {
let networkFilters = [];
const networkFilters = [];
cacheEntry.retrieve('net', networkFilters);
out.networkFilters = networkFilters.join(',\n');
}
@ -1362,7 +1070,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
request.tabId !== undefined &&
request.frameId !== undefined
) {
let injectedHideFilters = [];
const injectedHideFilters = [];
if ( out.declarativeFilters.length !== 0 ) {
injectedHideFilters.push(out.declarativeFilters.join(',\n'));
out.declarativeFilters = [];
@ -1380,7 +1088,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
out.highGenericHideComplex = '';
}
out.injectedHideFilters = injectedHideFilters.join(',\n');
let details = {
const details = {
code: '',
cssOrigin: 'user',
frameId: request.frameId,

View File

@ -28,7 +28,7 @@
const pselectors = new Map();
const duplicates = new Set();
let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2),
acceptedCount = 0,
discardedCount = 0,
docRegister;
@ -207,7 +207,7 @@
.setDocOriginFromURL(details.url)
.setFilter({
source: 'cosmetic',
raw: (exception === 0 ? '##' : '#@#') + '^' + selector
raw: `${exception === 0 ? '##' : '#@#'}^${selector}`
})
.toLogger();
};
@ -261,6 +261,7 @@
api.freeze = function() {
duplicates.clear();
filterDB.collectGarbage();
};
api.compile = function(parsed, writer) {
@ -283,16 +284,14 @@
for ( const hn of parsed.hostnames ) {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
let kind = 0;
if ( parsed.exception ) {
hash |= 0b0001;
kind |= 0b01;
}
writer.push([
compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65,
hash,
hn,
compiled
]);
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
kind |= 0b10;
}
writer.push([ 64, hn, kind, compiled ]);
}
};
@ -312,11 +311,7 @@
}
duplicates.add(fingerprint);
const args = reader.args();
filterDB.add(args[1], {
type: args[0],
hostname: args[2],
selector: args[3]
});
filterDB.store(args[1], args[2], args[3]);
}
};
@ -332,62 +327,58 @@
return;
}
const toRemoveArray = [];
const domainHash = µb.staticExtFilteringEngine.makeHash(details.domain);
if ( domainHash !== 0 ) {
filterDB.retrieve(domainHash, hostname, toRemoveArray);
}
const entity = details.entity;
const entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) {
filterDB.retrieve(entityHash, entity, toRemoveArray);
}
filterDB.retrieve(0, hostname, toRemoveArray);
if ( toRemoveArray.length === 0 ) { return; }
const plains = new Set();
const procedurals = new Set();
const exceptions = new Set();
let notToRemoveArray = [];
if ( domainHash !== 0 ) {
filterDB.retrieve(domainHash | 0b0001, hostname, notToRemoveArray);
}
if ( entityHash !== 0 ) {
filterDB.retrieve(entityHash | 0b0001, entity, notToRemoveArray);
}
filterDB.retrieve(0 | 0b0001, hostname, notToRemoveArray);
if ( notToRemoveArray.length === 0 ) {
return toRemoveArray;
filterDB.retrieve(
hostname,
[ plains, exceptions, procedurals, exceptions ]
);
if ( details.entity !== '' ) {
filterDB.retrieve(
`${hostname.slice(0, -details.domain)}${details.entity}`,
[ plains, exceptions, procedurals, exceptions ]
);
}
const toRemoveMap = new Map();
for ( const entry of toRemoveArray ) {
toRemoveMap.set(entry.selector, entry);
if ( plains.size === 0 && procedurals.size === 0 ) { return; }
const out = { plains, procedurals };
if ( exceptions.size === 0 ) {
return out;
}
for ( const entry of notToRemoveArray ) {
if ( toRemoveMap.has(entry.selector) === false ) { continue; }
toRemoveMap.delete(entry.selector);
if ( µb.logger.enabled === false ) { continue; }
let selector = entry.selector;
if ( entry.type === 65 ) {
selector = JSON.parse(selector).raw;
for ( const selector of exceptions ) {
if ( plains.has(selector) ) {
plains.delete(selector);
logOne(details, 1, selector);
continue;
}
if ( procedurals.has(selector) ) {
procedurals.delete(selector);
logOne(details, 1, JSON.parse(selector).raw);
continue;
}
logOne(details, 1, selector);
}
if ( toRemoveMap.size === 0 ) { return; }
return Array.from(toRemoveMap.values());
if ( plains.size !== 0 || procedurals.size !== 0 ) {
return out;
}
};
api.apply = function(doc, details) {
docRegister = doc;
let modified = false;
for ( const entry of details.selectors ) {
if ( entry.type === 64 ) {
if ( applyCSSSelector(details, entry.selector) ) {
modified = true;
}
} else /* if ( entry.type === 65 ) */ {
if ( applyProceduralSelector(details, entry.selector) ) {
modified = true;
}
for ( const selector of details.selectors.plains ) {
if ( applyCSSSelector(details, selector) ) {
modified = true;
}
}
for ( const selector of details.selectors.procedurals ) {
if ( applyProceduralSelector(details, selector) ) {
modified = true;
}
}
@ -400,7 +391,7 @@
};
api.fromSelfie = function(selfie) {
filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2, selfie);
pselectors.clear();
};

View File

@ -216,38 +216,26 @@ const fromCosmeticFilter = function(details) {
break;
// Specific cosmetic filtering
case 8:
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; }
isProcedural = (fargs[1] & 0b0010) !== 0;
// HTML filtering
case 64:
if ( exception !== ((fargs[2] & 0b01) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b10) !== 0;
if (
isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector
) {
break;
}
if ( hostnameMatches(fargs[2]) ) {
found = fargs[2] + prefix + selector;
if ( hostnameMatches(fargs[1]) ) {
found = fargs[1] + prefix + selector;
}
break;
// Scriptlet injection
case 32:
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; }
if ( exception !== ((fargs[2] & 1) !== 0) ) { break; }
if ( fargs[3] !== selector ) { break; }
if ( hostnameMatches(fargs[2]) ) {
found = fargs[2] + prefix + selector;
}
break;
// HTML filtering
case 64: // CSS selector
case 65: // procedural
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; }
if (
fargs[0] === 64 && fargs[3] !== selector ||
fargs[0] === 65 && JSON.parse(fargs[3]).raw !== selector
) {
break;
}
if ( hostnameMatches(fargs[2]) ) {
found = fargs[2] + prefix + selector;
if ( hostnameMatches(fargs[1]) ) {
found = fargs[1] + prefix + selector;
}
break;
}

View File

@ -33,7 +33,7 @@
let acceptedCount = 0,
discardedCount = 0,
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB();
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1);
const api = {
get acceptedCount() {
@ -241,6 +241,7 @@
api.freeze = function() {
duplicates.clear();
scriptletDB.collectGarbage();
};
api.compile = function(parsed, writer) {
@ -251,7 +252,7 @@
if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) {
writer.push([ 32, 0 | 0b0001, '', parsed.suffix ]);
writer.push([ 32, '', 1, parsed.suffix ]);
}
return;
}
@ -265,14 +266,14 @@
if ( negated ) {
hn = hn.slice(1);
}
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
let kind = 0;
if ( parsed.exception ) {
if ( negated ) { continue; }
hash |= 0b0001;
kind |= 1;
} else if ( negated ) {
hash |= 0b0001;
kind |= 1;
}
writer.push([ 32, hash, hn, parsed.suffix ]);
writer.push([ 32, hn, kind, parsed.suffix ]);
}
};
@ -295,10 +296,7 @@
duplicates.add(fingerprint);
const args = reader.args();
if ( args.length < 4 ) { continue; }
scriptletDB.add(
args[1],
{ hostname: args[2], token: args[3].slice(4, -1) }
);
scriptletDB.store(args[1], args[2], args[3].slice(4, -1));
}
};
@ -320,48 +318,36 @@
return;
}
const domain = request.domain;
const entity = request.entity;
const scriptlets = new Set();
const exceptions = exceptionsRegister;
const entries = [];
const domainHash = µb.staticExtFilteringEngine.makeHash(domain);
if ( domainHash !== 0 ) {
scriptletDB.retrieve(domainHash, hostname, entries);
scriptletDB.retrieve(
hostname,
[ scriptlets, exceptions ]
);
if ( request.entity !== '' ) {
scriptletDB.retrieve(
`${hostname.slice(0, -request.domain)}${request.entity}`,
[ scriptlets, exceptions ]
);
}
const entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash, entity, entries);
}
scriptletDB.retrieve(0, hostname, entries);
for ( const entry of entries ) {
lookupScriptlet(entry.token, reng, scriptletsRegister);
for ( const token of scriptlets ) {
lookupScriptlet(token, reng, scriptletsRegister);
}
if ( scriptletsRegister.size === 0 ) { return; }
// Collect exception filters.
entries.length = 0;
if ( domainHash !== 0 ) {
scriptletDB.retrieve(domainHash | 0b0001, hostname, entries);
}
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash | 0b0001, entity, entries);
}
scriptletDB.retrieve(0 | 0b0001, hostname, entries);
for ( const entry of entries ) {
exceptionsRegister.add(entry.token);
}
// Return an array of scriptlets, and log results if needed.
const out = [];
const loggerEnabled = µb.logger.enabled;
for ( const entry of scriptletsRegister ) {
const isException = exceptionsRegister.has(entry[0]);
for ( const [ token, code ] of scriptletsRegister ) {
const isException = exceptionsRegister.has(token);
if ( isException === false ) {
out.push(entry[1]);
out.push(code);
}
if ( loggerEnabled ) {
logOne(isException, entry[0], request);
logOne(isException, token, request);
}
}
@ -408,7 +394,7 @@
};
api.fromSelfie = function(selfie) {
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1, selfie);
};
return api;

View File

@ -486,83 +486,111 @@
// Public classes
//--------------------------------------------------------------------------
api.HostnameBasedDB = function(selfie) {
if ( selfie !== undefined ) {
this.db = new Map(selfie.map);
this.size = selfie.size;
} else {
this.db = new Map();
api.HostnameBasedDB = class {
constructor(nBits, selfie = undefined) {
this.nBits = nBits;
this.timer = undefined;
this.strToIdMap = new Map();
if ( selfie !== undefined ) {
this.fromSelfie(selfie);
return;
}
this.hostnameToSlotIdMap = new Map();
this.hostnameSlots = [];
this.strSlots = [];
this.size = 0;
}
};
api.HostnameBasedDB.prototype = {
add: function(hash, entry) {
let bucket = this.db.get(hash);
if ( bucket === undefined ) {
this.db.set(hash, entry);
} else if ( Array.isArray(bucket) ) {
bucket.push(entry);
} else {
this.db.set(hash, [ bucket, entry ]);
}
store(hn, bits, s) {
this.size += 1;
},
clear: function() {
this.db.clear();
let iStr = this.strToIdMap.get(s);
if ( iStr === undefined ) {
iStr = this.strSlots.length;
this.strSlots.push(s);
this.strToIdMap.set(s, iStr);
if ( this.timer === undefined ) {
this.collectGarbage(true);
}
}
const strId = iStr << this.nBits | bits;
const iHn = this.hostnameToSlotIdMap.get(hn);
if ( iHn === undefined ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
this.hostnameSlots.push(strId);
return;
}
const bucket = this.hostnameSlots[iHn];
if ( Array.isArray(bucket) ) {
bucket.push(strId);
} else {
this.hostnameSlots[iHn] = [ bucket, strId ];
}
}
clear() {
this.hostnameToSlotIdMap.clear();
this.hostnameSlots.length = 0;
this.strSlots.length = 0;
this.strToIdMap.clear();
this.size = 0;
},
retrieve: function(hash, hostname, out) {
let bucket = this.db.get(hash);
if ( bucket === undefined ) { return; }
if ( Array.isArray(bucket) === false ) {
bucket = [ bucket ];
}
for ( let entry of bucket ) {
if ( hostname.endsWith(entry.hostname) === false ) {
continue;
}
let i = hostname.length - entry.hostname.length;
if (
i === 0 ||
i === hostname.length ||
hostname.charCodeAt(i-1) === 0x2E /* '.' */
) {
out.push(entry);
}
collectGarbage(async = false) {
if ( async === false ) {
if ( this.timer !== undefined ) {
self.cancelIdleCallback(this.timer);
this.timer = undefined;
}
this.strToIdMap.clear();
return;
}
},
toSelfie: function() {
if ( this.timer !== undefined ) { return; }
this.timer = self.requestIdleCallback(
( ) => {
this.timer = undefined;
this.strToIdMap.clear();
},
{ timeout: 10000 }
);
}
retrieve(hostname, out) {
const mask = out.length - 1; // out.length must be power of two
for (;;) {
const filterId = this.hostnameToSlotIdMap.get(hostname);
if ( filterId !== undefined ) {
const bucket = this.hostnameSlots[filterId];
if ( Array.isArray(bucket) ) {
for ( const id of bucket ) {
out[id & mask].add(this.strSlots[id >>> this.nBits]);
}
} else {
out[bucket & mask].add(this.strSlots[bucket >>> this.nBits]);
}
}
if ( hostname === '' ) { break; }
const pos = hostname.indexOf('.');
hostname = pos !== -1 ? hostname.slice(pos + 1) : '';
}
}
toSelfie() {
return {
map: Array.from(this.db),
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
hostnameSlots: this.hostnameSlots,
strSlots: this.strSlots,
size: this.size
};
}
};
api.HostnameBasedDB.prototype[Symbol.iterator] = (function() {
const Iter = function(db) {
this.mapIter = db.values();
this.arrayIter = undefined;
};
Iter.prototype.next = function() {
let result;
if ( this.arrayIter !== undefined ) {
result = this.arrayIter.next();
if ( result.done === false ) { return result; }
this.arrayIter = undefined;
}
result = this.mapIter.next();
if ( result.done || Array.isArray(result.value) === false ) {
return result;
}
this.arrayIter = result.value[Symbol.iterator]();
return this.arrayIter.next(); // array should never be empty
};
return function() {
return new Iter(this.db);
};
})();
fromSelfie(selfie) {
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
this.hostnameSlots = selfie.hostnameSlots;
this.strSlots = selfie.strSlots;
this.size = selfie.size;
}
};
//--------------------------------------------------------------------------
// Public methods
@ -584,60 +612,6 @@
resetParsed(parsed);
};
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved
// +------ bit 15-4: FNV
api.makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
const i1 = token.length;
if ( i1 === 0 ) { return 0; }
const i2 = i1 >> 1;
const i4 = i1 >> 2;
const i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval &= 0xFFF0;
// Can't return 0, it's reserved for empty string.
return hval !== 0 ? hval : 0xfff0;
};
api.compileHostnameToHash = function(hostname) {
let domain;
if ( hostname.endsWith('.*') ) {
const pos = hostname.lastIndexOf('.', hostname.length - 3);
domain = pos !== -1 ? hostname.slice(pos + 1) : hostname;
} else {
domain = µb.URI.domainFromHostname(hostname);
}
return api.makeHash(domain);
};
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.