Refactor runtime storage of specific cosmetic filters

This was a TODO item:
- 07cbae66a4/src/js/cosmetic-filtering.js (L375)

µBlock.staticExtFilteringEngine.HostnameBasedDB has been
re-factored to accomodate the storing of specific cosmetic
filters.

As a result of this refactoring:

- Memory usage has been further decreased
- Performance of selector retrieval marginally
  improved
- New internal representation opens the door
  to use a specialized version of HNTrie, which
  should further improve performance/memory
  usage
This commit is contained in:
Raymond Hill 2019-05-14 08:52:34 -04:00
parent 8a312b9bbb
commit 93f80eedfa
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
6 changed files with 271 additions and 624 deletions

View File

@ -137,8 +137,8 @@ const µBlock = (function() { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 14, // Increase when compiled format changes compiledMagic: 15, // Increase when compiled format changes
selfieMagic: 14 // Increase when selfie format changes selfieMagic: 15 // Increase when selfie format changes
}, },
restoreBackupSettings: { restoreBackupSettings: {

View File

@ -27,8 +27,8 @@
/******************************************************************************/ /******************************************************************************/
let µb = µBlock; const µb = µBlock;
let cosmeticSurveyingMissCountMax = const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15; 15;
@ -38,212 +38,30 @@ window.addEventListener('webextFlavor', function() {
supportsUserStylesheets = vAPI.webextFlavor.soup.has('user_stylesheet'); supportsUserStylesheets = vAPI.webextFlavor.soup.has('user_stylesheet');
}, { once: true }); }, { once: true });
/*******************************************************************************
Each filter class will register itself in the map.
IMPORTANT: any change which modifies the mapping will have to be
reflected with µBlock.systemSettings.compiledMagic.
**/
let filterClasses = [];
let registerFilterClass = function(ctor) {
filterClasses[ctor.prototype.fid] = ctor;
};
let filterFromCompiledData = function(args) {
return filterClasses[args[0]].load(args);
};
/******************************************************************************/
// One hostname => one selector
const FilterOneOne = function(hostname, selector) {
this.hostname = hostname;
this.selector = selector;
};
FilterOneOne.prototype = {
fid: 8,
// Since this class can hold only one single selector, adding a new
// hostname-selector requires to morph the filter instance into a
// better-suited class.
add: function(hostname, selector) {
if ( hostname === this.hostname ) {
return new FilterOneMany(
this.hostname,
[ this.selector, selector ]
);
}
return new FilterManyAny([
[ this.hostname, this.selector ],
[ hostname, selector ]
]);
},
retrieve: function(target, out) {
if ( target.endsWith(this.hostname) === false ) { return; }
const i = target.length - this.hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) { return; }
out.add(this.selector);
},
compile: function() {
return [ this.fid, this.hostname, this.selector ];
}
};
FilterOneOne.load = function(data) {
return new FilterOneOne(data[1], data[2]);
};
registerFilterClass(FilterOneOne);
/******************************************************************************/
// One hostname => many selectors
const FilterOneMany = function(hostname, selectors) {
this.hostname = hostname;
this.selectors = selectors;
};
FilterOneMany.prototype = {
fid: 9,
// Since this class can hold selectors for only one specific hostname,
// adding a new hostname will require to morph the filter instance into a
// better-suited class.
add: function(hostname, selector) {
if ( hostname === this.hostname ) {
this.selectors.push(selector);
return this;
}
return new FilterManyAny([
[ this.hostname, this.selectors ],
[ hostname, selector ]
]);
},
retrieve: function(target, out) {
if ( target.endsWith(this.hostname) === false ) { return; }
const i = target.length - this.hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) { return; }
for ( let selector of this.selectors ) {
out.add(selector);
}
},
compile: function() {
return [ this.fid, this.hostname, this.selectors ];
}
};
FilterOneMany.load = function(data) {
return new FilterOneMany(data[1], data[2]);
};
registerFilterClass(FilterOneMany);
/******************************************************************************/
// Many hostnames => one or many selectors
let FilterManyAny = function(entries) {
this.entries = new Map(entries);
};
FilterManyAny.prototype = {
fid: 10,
add: function(hostname, selector) {
const selectors = this.entries.get(hostname);
if ( selectors === undefined ) {
this.entries.set(hostname, selector);
} else if ( typeof selectors === 'string' ) {
this.entries.set(hostname, [ selectors, selector ]);
} else {
selectors.push(selector);
}
},
retrieve: function(target, out) {
for ( const entry of this.entries ) {
const hostname = entry[0];
if ( target.endsWith(hostname) === false ) { continue; }
const i = target.length - hostname.length;
if ( i !== 0 && target.charCodeAt(i-1) !== 0x2E /* '.' */ ) {
continue;
}
const selectors = entry[1];
if ( typeof selectors === 'string' ) {
out.add(selectors);
continue;
}
for ( const selector of selectors ) {
out.add(selector);
}
}
},
compile: function() {
return [ this.fid, Array.from(this.entries) ];
}
};
FilterManyAny.load = function(data) {
return new FilterManyAny(data[1]);
};
registerFilterClass(FilterManyAny);
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
let SelectorCacheEntry = function() { const SelectorCacheEntry = class {
constructor() {
this.reset(); this.reset();
};
/******************************************************************************/
SelectorCacheEntry.junkyard = [];
SelectorCacheEntry.factory = function() {
let entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
} }
return new SelectorCacheEntry();
};
/******************************************************************************/ reset() {
let netSelectorCacheLowWaterMark = 20;
let netSelectorCacheHighWaterMark = 30;
/******************************************************************************/
SelectorCacheEntry.prototype = {
reset: function() {
this.cosmetic = new Set(); this.cosmetic = new Set();
this.cosmeticSurveyingMissCount = 0; this.cosmeticSurveyingMissCount = 0;
this.net = new Map(); this.net = new Map();
this.lastAccessTime = Date.now(); this.lastAccessTime = Date.now();
return this; return this;
}, }
dispose: function() { dispose() {
this.cosmetic = this.net = null; this.cosmetic = this.net = null;
if ( SelectorCacheEntry.junkyard.length < 25 ) { if ( SelectorCacheEntry.junkyard.length < 25 ) {
SelectorCacheEntry.junkyard.push(this); SelectorCacheEntry.junkyard.push(this);
} }
}, }
addCosmetic: function(details) { addCosmetic(details) {
let selectors = details.selectors, let selectors = details.selectors,
i = selectors.length || 0; i = selectors.length || 0;
// https://github.com/gorhill/uBlock/issues/2011 // https://github.com/gorhill/uBlock/issues/2011
@ -258,50 +76,52 @@ SelectorCacheEntry.prototype = {
while ( i-- ) { while ( i-- ) {
this.cosmetic.add(selectors[i]); this.cosmetic.add(selectors[i]);
} }
}, }
addNet: function(selectors) { addNet(selectors) {
if ( typeof selectors === 'string' ) { if ( typeof selectors === 'string' ) {
this.addNetOne(selectors, Date.now()); this.addNetOne(selectors, Date.now());
} else { } else {
this.addNetMany(selectors, Date.now()); this.addNetMany(selectors, Date.now());
} }
// Net request-derived selectors: I limit the number of cached selectors, // Net request-derived selectors: I limit the number of cached
// as I expect cases where the blocked net-requests are never the // selectors, as I expect cases where the blocked net-requests
// exact same URL. // are never the exact same URL.
if ( this.net.size < netSelectorCacheHighWaterMark ) { return; } if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) {
return;
}
let dict = this.net; let dict = this.net;
let keys = Array.from(dict.keys()).sort(function(a, b) { let keys = Array.from(dict.keys()).sort(function(a, b) {
return dict.get(b) - dict.get(a); return dict.get(b) - dict.get(a);
}).slice(netSelectorCacheLowWaterMark); }).slice(SelectorCacheEntry.netLowWaterMark);
let i = keys.length; let i = keys.length;
while ( i-- ) { while ( i-- ) {
dict.delete(keys[i]); dict.delete(keys[i]);
} }
}, }
addNetOne: function(selector, now) { addNetOne(selector, now) {
this.net.set(selector, now); this.net.set(selector, now);
}, }
addNetMany: function(selectors, now) { addNetMany(selectors, now) {
let i = selectors.length || 0; let i = selectors.length || 0;
while ( i-- ) { while ( i-- ) {
this.net.set(selectors[i], now); this.net.set(selectors[i], now);
} }
}, }
add: function(details) { add(details) {
this.lastAccessTime = Date.now(); this.lastAccessTime = Date.now();
if ( details.type === 'cosmetic' ) { if ( details.type === 'cosmetic' ) {
this.addCosmetic(details); this.addCosmetic(details);
} else { } else {
this.addNet(details.selectors); this.addNet(details.selectors);
} }
}, }
// https://github.com/chrisaljoudi/uBlock/issues/420 // https://github.com/chrisaljoudi/uBlock/issues/420
remove: function(type) { remove(type) {
this.lastAccessTime = Date.now(); this.lastAccessTime = Date.now();
if ( type === undefined || type === 'cosmetic' ) { if ( type === undefined || type === 'cosmetic' ) {
this.cosmetic.clear(); this.cosmetic.clear();
@ -310,21 +130,21 @@ SelectorCacheEntry.prototype = {
if ( type === undefined || type === 'net' ) { if ( type === undefined || type === 'net' ) {
this.net.clear(); this.net.clear();
} }
}, }
retrieveToArray: function(iterator, out) { retrieveToArray(iterator, out) {
for ( let selector of iterator ) { for ( let selector of iterator ) {
out.push(selector); out.push(selector);
} }
}, }
retrieveToSet: function(iterator, out) { retrieveToSet(iterator, out) {
for ( let selector of iterator ) { for ( let selector of iterator ) {
out.add(selector); out.add(selector);
} }
}, }
retrieve: function(type, out) { retrieve(type, out) {
this.lastAccessTime = Date.now(); this.lastAccessTime = Date.now();
let iterator = type === 'cosmetic' ? this.cosmetic : this.net.keys(); let iterator = type === 'cosmetic' ? this.cosmetic : this.net.keys();
if ( Array.isArray(out) ) { if ( Array.isArray(out) ) {
@ -333,8 +153,20 @@ SelectorCacheEntry.prototype = {
this.retrieveToSet(iterator, out); this.retrieveToSet(iterator, out);
} }
} }
static factory() {
const entry = SelectorCacheEntry.junkyard.pop();
if ( entry ) {
return entry.reset();
}
return new SelectorCacheEntry();
}
}; };
SelectorCacheEntry.netLowWaterMark = 20;
SelectorCacheEntry.netHighWaterMark = 30;
SelectorCacheEntry.junkyard = [];
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
@ -353,7 +185,7 @@ SelectorCacheEntry.prototype = {
// Generic filters can only be enforced once the main document is loaded. // Generic filters can only be enforced once the main document is loaded.
// Specific filers can be enforced before the main document is loaded. // Specific filers can be enforced before the main document is loaded.
let FilterContainer = function() { const FilterContainer = function() {
this.reHasUnicode = /[^\x00-\x7F]/; this.reHasUnicode = /[^\x00-\x7F]/;
this.rePlainSelector = /^[#.][\w\\-]+/; this.rePlainSelector = /^[#.][\w\\-]+/;
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/; this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
@ -366,17 +198,14 @@ let FilterContainer = function() {
this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes
this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes
this.selectorCacheCountMin = 25; this.selectorCacheCountMin = 25;
this.netSelectorCacheCountMax = netSelectorCacheHighWaterMark; this.netSelectorCacheCountMax = SelectorCacheEntry.netHighWaterMark;
this.selectorCacheTimer = null; this.selectorCacheTimer = null;
// generic exception filters // generic exception filters
this.genericDonthideSet = new Set(); this.genericDonthideSet = new Set();
// TODO: Think about reusing µb.staticExtFilteringEngine.HostnameBasedDB // specific filters
// for both specific and procedural filters. This would require some this.specificFilters = new µb.staticExtFilteringEngine.HostnameBasedDB(2);
// refactoring.
// hostname, entity-based filters
this.specificFilters = new Map();
// low generic cosmetic filters, organized by id/class then simple/complex. // low generic cosmetic filters, organized by id/class then simple/complex.
this.lowlyGeneric = Object.create(null); this.lowlyGeneric = Object.create(null);
@ -464,7 +293,8 @@ FilterContainer.prototype.reset = function() {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.freeze = function() { FilterContainer.prototype.freeze = function() {
this.duplicateBuster = new Set(); this.duplicateBuster.clear();
this.specificFilters.collectGarbage();
this.hasGenericHide = this.hasGenericHide =
this.lowlyGeneric.id.simple.size !== 0 || this.lowlyGeneric.id.simple.size !== 0 ||
@ -694,7 +524,7 @@ FilterContainer.prototype.compileSpecificSelector = function(
unhide ^= 1; unhide ^= 1;
} }
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); const compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?'; const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({ µb.logger.writeOne({
@ -705,19 +535,16 @@ FilterContainer.prototype.compileSpecificSelector = function(
return; return;
} }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname);
// Exception? let kind = 0;
if ( unhide === 1 ) { if ( unhide === 1 ) {
hash |= 0b0001; kind |= 0b01; // Exception
}
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
kind |= 0b10; // Procedural
} }
// Procedural? writer.push([ 8, hostname, kind, compiled ]);
if ( compiled.charCodeAt(0) === 0x7B ) {
hash |= 0b0010;
}
writer.push([ 8, hash, hostname, compiled ]);
}; };
/******************************************************************************/ /******************************************************************************/
@ -739,14 +566,14 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
while ( reader.next() ) { while ( reader.next() ) {
this.acceptedCount += 1; this.acceptedCount += 1;
let fingerprint = reader.fingerprint(); const fingerprint = reader.fingerprint();
if ( this.duplicateBuster.has(fingerprint) ) { if ( this.duplicateBuster.has(fingerprint) ) {
this.discardedCount += 1; this.discardedCount += 1;
continue; continue;
} }
this.duplicateBuster.add(fingerprint); this.duplicateBuster.add(fingerprint);
let args = reader.args(); const args = reader.args();
switch ( args[0] ) { switch ( args[0] ) {
@ -805,20 +632,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
// hash, example.com, .promoted-tweet // hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet // hash, example.*, .promoted-tweet
case 8: case 8:
bucket = this.specificFilters.get(args[1]); this.specificFilters.store(args[1], args[2], args[3]);
if ( bucket === undefined ) {
this.specificFilters.set(
args[1],
new FilterOneOne(args[2], args[3])
);
} else if ( bucket instanceof FilterManyAny ) {
bucket.add(args[2], args[3]);
} else /* can morph, so we need to replace entry in map */ {
this.specificFilters.set(
args[1],
bucket.add(args[2], args[3])
);
}
break; break;
default: default:
@ -856,21 +670,7 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
// hash, example.com, .promoted-tweet // hash, example.com, .promoted-tweet
// hash, example.*, .promoted-tweet // hash, example.*, .promoted-tweet
case 8: case 8:
this.duplicateBuster.add(fingerprint); this.specificFilters.store(args[1], args[2], args[3]);
const bucket = this.specificFilters.get(args[1]);
if ( bucket === undefined ) {
this.specificFilters.set(
args[1],
new FilterOneOne(args[2], args[3])
);
} else if ( bucket instanceof FilterManyAny ) {
bucket.add(args[2], args[3]);
} else /* can morph, so we need to replace entry in map */ {
this.specificFilters.set(
args[1],
bucket.add(args[2], args[3])
);
}
break; break;
default: default:
@ -895,18 +695,10 @@ FilterContainer.prototype.skipCompiledContent = function(reader) {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.toSelfie = function() { FilterContainer.prototype.toSelfie = function() {
let selfieFromMap = function(map) {
let entries = [];
for ( let entry of map ) {
entries.push([ entry[0], entry[1].compile() ]);
}
return entries;
};
return { return {
acceptedCount: this.acceptedCount, acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount, discardedCount: this.discardedCount,
specificFilters: selfieFromMap(this.specificFilters), specificFilters: this.specificFilters.toSelfie(),
hasGenericHide: this.hasGenericHide, hasGenericHide: this.hasGenericHide,
lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple), lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple),
lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex), lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex),
@ -921,17 +713,9 @@ FilterContainer.prototype.toSelfie = function() {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) { FilterContainer.prototype.fromSelfie = function(selfie) {
let mapFromSelfie = function(entries) {
let out = new Map();
for ( let entry of entries ) {
out.set(entry[0], filterFromCompiledData(entry[1]));
}
return out;
};
this.acceptedCount = selfie.acceptedCount; this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount; this.discardedCount = selfie.discardedCount;
this.specificFilters = mapFromSelfie(selfie.specificFilters); this.specificFilters.fromSelfie(selfie.specificFilters);
this.hasGenericHide = selfie.hasGenericHide; this.hasGenericHide = selfie.hasGenericHide;
this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID); this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID);
this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID); this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID);
@ -1163,8 +947,8 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
) { ) {
//console.time('cosmeticFilteringEngine.retrieveSpecificSelectors'); //console.time('cosmeticFilteringEngine.retrieveSpecificSelectors');
let hostname = request.hostname, const hostname = request.hostname;
cacheEntry = this.selectorCache.get(hostname); const cacheEntry = this.selectorCache.get(hostname);
// https://github.com/chrisaljoudi/uBlock/issues/587 // https://github.com/chrisaljoudi/uBlock/issues/587
// out.ready will tell the content script the cosmetic filtering engine is // out.ready will tell the content script the cosmetic filtering engine is
@ -1173,7 +957,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// https://github.com/chrisaljoudi/uBlock/issues/497 // https://github.com/chrisaljoudi/uBlock/issues/497
// Generic exception filters are to be applied on all pages. // Generic exception filters are to be applied on all pages.
let out = { const out = {
ready: this.frozen, ready: this.frozen,
hostname: hostname, hostname: hostname,
domain: request.domain, domain: request.domain,
@ -1190,79 +974,14 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
}; };
if ( options.noCosmeticFiltering !== true ) { if ( options.noCosmeticFiltering !== true ) {
let entity = request.entity,
domainHash = µb.staticExtFilteringEngine.makeHash(request.domain),
entityHash = µb.staticExtFilteringEngine.makeHash(entity),
bucket;
// Exception cosmetic filters: prime with generic exception filters. // Exception cosmetic filters: prime with generic exception filters.
let exceptionSet = this.setRegister0; const exceptionSet = this.setRegister0;
// Genetic exceptions (should be extremely rare). // Genetic exceptions (should be extremely rare).
for ( let exception of this.genericDonthideSet ) { for ( let exception of this.genericDonthideSet ) {
exceptionSet.add(exception); exceptionSet.add(exception);
} }
// Specific exception cosmetic filters.
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(domainHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
}
// Specific entity-based exception cosmetic filters.
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
bucket = this.specificFilters.get(entityHash | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(entity, exceptionSet);
}
}
// Special bucket for those filters without a valid
// domain name as per PSL.
bucket = this.specificFilters.get(0 | 0b0001);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
bucket = this.specificFilters.get(0 | 0b0011);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, exceptionSet);
}
if ( exceptionSet.size !== 0 ) {
out.exceptionFilters = Array.from(exceptionSet);
}
// Declarative cosmetic filters. const specificSet = this.setRegister1;
// TODO: Should I go one step further and store specific simple and
// specific complex in different collections? This could simplify
// slightly content script code.
let specificSet = this.setRegister1;
// Specific cosmetic filters.
if ( domainHash !== 0 ) {
bucket = this.specificFilters.get(domainHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
}
// Specific entity-based cosmetic filters.
if ( entityHash !== 0 ) {
bucket = this.specificFilters.get(entityHash | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(entity, specificSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0000);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, specificSet);
}
// Cached cosmetic filters: these are always declarative. // Cached cosmetic filters: these are always declarative.
if ( cacheEntry !== undefined ) { if ( cacheEntry !== undefined ) {
cacheEntry.retrieve('cosmetic', specificSet); cacheEntry.retrieve('cosmetic', specificSet);
@ -1272,32 +991,21 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
} }
} }
// Procedural cosmetic filters. const proceduralSet = this.setRegister2;
let proceduralSet = this.setRegister2;
// Specific cosmetic filters. this.specificFilters.retrieve(
if ( domainHash !== 0 ) { hostname,
bucket = this.specificFilters.get(domainHash | 0b0010); [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
if ( bucket !== undefined ) { );
bucket.retrieve(hostname, proceduralSet); if ( request.entity !== '' ) {
} this.specificFilters.retrieve(
} `${hostname.slice(0, -request.domain.length)}${request.entity}`,
// Specific entity-based cosmetic filters. [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
if ( entityHash !== 0 ) { );
bucket = this.specificFilters.get(entityHash | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(entity, proceduralSet);
}
}
// https://github.com/chrisaljoudi/uBlock/issues/188
// Special bucket for those filters without a valid domain name
// as per PSL
bucket = this.specificFilters.get(0 | 0b0010);
if ( bucket !== undefined ) {
bucket.retrieve(hostname, proceduralSet);
} }
// Apply exceptions. // Apply exceptions.
for ( let exception of exceptionSet ) { for ( const exception of exceptionSet ) {
specificSet.delete(exception); specificSet.delete(exception);
proceduralSet.delete(exception); proceduralSet.delete(exception);
} }
@ -1317,15 +1025,15 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// string in memory, which I have observed occurs when the string is // string in memory, which I have observed occurs when the string is
// stored directly as a value in a Map. // stored directly as a value in a Map.
if ( options.noGenericCosmeticFiltering !== true ) { if ( options.noGenericCosmeticFiltering !== true ) {
let exceptionHash = out.exceptionFilters.join(); const exceptionHash = out.exceptionFilters.join();
for ( let type in this.highlyGeneric ) { for ( const type in this.highlyGeneric ) {
let entry = this.highlyGeneric[type]; const entry = this.highlyGeneric[type];
let str = entry.mru.lookup(exceptionHash); let str = entry.mru.lookup(exceptionHash);
if ( str === undefined ) { if ( str === undefined ) {
str = { s: entry.str }; str = { s: entry.str };
let genericSet = entry.dict; let genericSet = entry.dict;
let hit = false; let hit = false;
for ( let exception of exceptionSet ) { for ( const exception of exceptionSet ) {
if ( (hit = genericSet.has(exception)) ) { break; } if ( (hit = genericSet.has(exception)) ) { break; }
} }
if ( hit ) { if ( hit ) {
@ -1349,7 +1057,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
// CSS selectors for collapsible blocked elements // CSS selectors for collapsible blocked elements
if ( cacheEntry ) { if ( cacheEntry ) {
let networkFilters = []; const networkFilters = [];
cacheEntry.retrieve('net', networkFilters); cacheEntry.retrieve('net', networkFilters);
out.networkFilters = networkFilters.join(',\n'); out.networkFilters = networkFilters.join(',\n');
} }
@ -1362,7 +1070,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
request.tabId !== undefined && request.tabId !== undefined &&
request.frameId !== undefined request.frameId !== undefined
) { ) {
let injectedHideFilters = []; const injectedHideFilters = [];
if ( out.declarativeFilters.length !== 0 ) { if ( out.declarativeFilters.length !== 0 ) {
injectedHideFilters.push(out.declarativeFilters.join(',\n')); injectedHideFilters.push(out.declarativeFilters.join(',\n'));
out.declarativeFilters = []; out.declarativeFilters = [];
@ -1380,7 +1088,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
out.highGenericHideComplex = ''; out.highGenericHideComplex = '';
} }
out.injectedHideFilters = injectedHideFilters.join(',\n'); out.injectedHideFilters = injectedHideFilters.join(',\n');
let details = { const details = {
code: '', code: '',
cssOrigin: 'user', cssOrigin: 'user',
frameId: request.frameId, frameId: request.frameId,

View File

@ -28,7 +28,7 @@
const pselectors = new Map(); const pselectors = new Map();
const duplicates = new Set(); const duplicates = new Set();
let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(), let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2),
acceptedCount = 0, acceptedCount = 0,
discardedCount = 0, discardedCount = 0,
docRegister; docRegister;
@ -207,7 +207,7 @@
.setDocOriginFromURL(details.url) .setDocOriginFromURL(details.url)
.setFilter({ .setFilter({
source: 'cosmetic', source: 'cosmetic',
raw: (exception === 0 ? '##' : '#@#') + '^' + selector raw: `${exception === 0 ? '##' : '#@#'}^${selector}`
}) })
.toLogger(); .toLogger();
}; };
@ -261,6 +261,7 @@
api.freeze = function() { api.freeze = function() {
duplicates.clear(); duplicates.clear();
filterDB.collectGarbage();
}; };
api.compile = function(parsed, writer) { api.compile = function(parsed, writer) {
@ -283,16 +284,14 @@
for ( const hn of parsed.hostnames ) { for ( const hn of parsed.hostnames ) {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; } if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn); let kind = 0;
if ( parsed.exception ) { if ( parsed.exception ) {
hash |= 0b0001; kind |= 0b01;
} }
writer.push([ if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65, kind |= 0b10;
hash, }
hn, writer.push([ 64, hn, kind, compiled ]);
compiled
]);
} }
}; };
@ -312,11 +311,7 @@
} }
duplicates.add(fingerprint); duplicates.add(fingerprint);
const args = reader.args(); const args = reader.args();
filterDB.add(args[1], { filterDB.store(args[1], args[2], args[3]);
type: args[0],
hostname: args[2],
selector: args[3]
});
} }
}; };
@ -332,62 +327,58 @@
return; return;
} }
const toRemoveArray = []; const plains = new Set();
const domainHash = µb.staticExtFilteringEngine.makeHash(details.domain); const procedurals = new Set();
if ( domainHash !== 0 ) { const exceptions = new Set();
filterDB.retrieve(domainHash, hostname, toRemoveArray);
}
const entity = details.entity;
const entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) {
filterDB.retrieve(entityHash, entity, toRemoveArray);
}
filterDB.retrieve(0, hostname, toRemoveArray);
if ( toRemoveArray.length === 0 ) { return; }
let notToRemoveArray = []; filterDB.retrieve(
if ( domainHash !== 0 ) { hostname,
filterDB.retrieve(domainHash | 0b0001, hostname, notToRemoveArray); [ plains, exceptions, procedurals, exceptions ]
} );
if ( entityHash !== 0 ) { if ( details.entity !== '' ) {
filterDB.retrieve(entityHash | 0b0001, entity, notToRemoveArray); filterDB.retrieve(
} `${hostname.slice(0, -details.domain)}${details.entity}`,
filterDB.retrieve(0 | 0b0001, hostname, notToRemoveArray); [ plains, exceptions, procedurals, exceptions ]
if ( notToRemoveArray.length === 0 ) { );
return toRemoveArray;
} }
const toRemoveMap = new Map(); if ( plains.size === 0 && procedurals.size === 0 ) { return; }
for ( const entry of toRemoveArray ) {
toRemoveMap.set(entry.selector, entry); const out = { plains, procedurals };
}
for ( const entry of notToRemoveArray ) { if ( exceptions.size === 0 ) {
if ( toRemoveMap.has(entry.selector) === false ) { continue; } return out;
toRemoveMap.delete(entry.selector);
if ( µb.logger.enabled === false ) { continue; }
let selector = entry.selector;
if ( entry.type === 65 ) {
selector = JSON.parse(selector).raw;
} }
for ( const selector of exceptions ) {
if ( plains.has(selector) ) {
plains.delete(selector);
logOne(details, 1, selector); logOne(details, 1, selector);
continue;
}
if ( procedurals.has(selector) ) {
procedurals.delete(selector);
logOne(details, 1, JSON.parse(selector).raw);
continue;
}
} }
if ( toRemoveMap.size === 0 ) { return; } if ( plains.size !== 0 || procedurals.size !== 0 ) {
return Array.from(toRemoveMap.values()); return out;
}
}; };
api.apply = function(doc, details) { api.apply = function(doc, details) {
docRegister = doc; docRegister = doc;
let modified = false; let modified = false;
for ( const entry of details.selectors ) { for ( const selector of details.selectors.plains ) {
if ( entry.type === 64 ) { if ( applyCSSSelector(details, selector) ) {
if ( applyCSSSelector(details, entry.selector) ) {
modified = true; modified = true;
} }
} else /* if ( entry.type === 65 ) */ {
if ( applyProceduralSelector(details, entry.selector) ) {
modified = true;
} }
for ( const selector of details.selectors.procedurals ) {
if ( applyProceduralSelector(details, selector) ) {
modified = true;
} }
} }
@ -400,7 +391,7 @@
}; };
api.fromSelfie = function(selfie) { api.fromSelfie = function(selfie) {
filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie); filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(2, selfie);
pselectors.clear(); pselectors.clear();
}; };

View File

@ -216,38 +216,26 @@ const fromCosmeticFilter = function(details) {
break; break;
// Specific cosmetic filtering // Specific cosmetic filtering
case 8: case 8:
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; } // HTML filtering
isProcedural = (fargs[1] & 0b0010) !== 0; case 64:
if ( exception !== ((fargs[2] & 0b01) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b10) !== 0;
if ( if (
isProcedural === false && fargs[3] !== selector || isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector isProcedural && JSON.parse(fargs[3]).raw !== selector
) { ) {
break; break;
} }
if ( hostnameMatches(fargs[2]) ) { if ( hostnameMatches(fargs[1]) ) {
found = fargs[2] + prefix + selector; found = fargs[1] + prefix + selector;
} }
break; break;
// Scriptlet injection // Scriptlet injection
case 32: case 32:
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; } if ( exception !== ((fargs[2] & 1) !== 0) ) { break; }
if ( fargs[3] !== selector ) { break; } if ( fargs[3] !== selector ) { break; }
if ( hostnameMatches(fargs[2]) ) { if ( hostnameMatches(fargs[1]) ) {
found = fargs[2] + prefix + selector; found = fargs[1] + prefix + selector;
}
break;
// HTML filtering
case 64: // CSS selector
case 65: // procedural
if ( exception !== ((fargs[1] & 0b0001) !== 0) ) { break; }
if (
fargs[0] === 64 && fargs[3] !== selector ||
fargs[0] === 65 && JSON.parse(fargs[3]).raw !== selector
) {
break;
}
if ( hostnameMatches(fargs[2]) ) {
found = fargs[2] + prefix + selector;
} }
break; break;
} }

View File

@ -33,7 +33,7 @@
let acceptedCount = 0, let acceptedCount = 0,
discardedCount = 0, discardedCount = 0,
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(); scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1);
const api = { const api = {
get acceptedCount() { get acceptedCount() {
@ -241,6 +241,7 @@
api.freeze = function() { api.freeze = function() {
duplicates.clear(); duplicates.clear();
scriptletDB.collectGarbage();
}; };
api.compile = function(parsed, writer) { api.compile = function(parsed, writer) {
@ -251,7 +252,7 @@
if ( parsed.hostnames.length === 0 ) { if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) { if ( parsed.exception ) {
writer.push([ 32, 0 | 0b0001, '', parsed.suffix ]); writer.push([ 32, '', 1, parsed.suffix ]);
} }
return; return;
} }
@ -265,14 +266,14 @@
if ( negated ) { if ( negated ) {
hn = hn.slice(1); hn = hn.slice(1);
} }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn); let kind = 0;
if ( parsed.exception ) { if ( parsed.exception ) {
if ( negated ) { continue; } if ( negated ) { continue; }
hash |= 0b0001; kind |= 1;
} else if ( negated ) { } else if ( negated ) {
hash |= 0b0001; kind |= 1;
} }
writer.push([ 32, hash, hn, parsed.suffix ]); writer.push([ 32, hn, kind, parsed.suffix ]);
} }
}; };
@ -295,10 +296,7 @@
duplicates.add(fingerprint); duplicates.add(fingerprint);
const args = reader.args(); const args = reader.args();
if ( args.length < 4 ) { continue; } if ( args.length < 4 ) { continue; }
scriptletDB.add( scriptletDB.store(args[1], args[2], args[3].slice(4, -1));
args[1],
{ hostname: args[2], token: args[3].slice(4, -1) }
);
} }
}; };
@ -320,48 +318,36 @@
return; return;
} }
const domain = request.domain; const scriptlets = new Set();
const entity = request.entity; const exceptions = exceptionsRegister;
const entries = []; scriptletDB.retrieve(
const domainHash = µb.staticExtFilteringEngine.makeHash(domain); hostname,
if ( domainHash !== 0 ) { [ scriptlets, exceptions ]
scriptletDB.retrieve(domainHash, hostname, entries); );
if ( request.entity !== '' ) {
scriptletDB.retrieve(
`${hostname.slice(0, -request.domain)}${request.entity}`,
[ scriptlets, exceptions ]
);
} }
const entityHash = µb.staticExtFilteringEngine.makeHash(entity);
if ( entityHash !== 0 ) { for ( const token of scriptlets ) {
scriptletDB.retrieve(entityHash, entity, entries); lookupScriptlet(token, reng, scriptletsRegister);
}
scriptletDB.retrieve(0, hostname, entries);
for ( const entry of entries ) {
lookupScriptlet(entry.token, reng, scriptletsRegister);
} }
if ( scriptletsRegister.size === 0 ) { return; } if ( scriptletsRegister.size === 0 ) { return; }
// Collect exception filters.
entries.length = 0;
if ( domainHash !== 0 ) {
scriptletDB.retrieve(domainHash | 0b0001, hostname, entries);
}
if ( entityHash !== 0 ) {
scriptletDB.retrieve(entityHash | 0b0001, entity, entries);
}
scriptletDB.retrieve(0 | 0b0001, hostname, entries);
for ( const entry of entries ) {
exceptionsRegister.add(entry.token);
}
// Return an array of scriptlets, and log results if needed. // Return an array of scriptlets, and log results if needed.
const out = []; const out = [];
const loggerEnabled = µb.logger.enabled; const loggerEnabled = µb.logger.enabled;
for ( const entry of scriptletsRegister ) { for ( const [ token, code ] of scriptletsRegister ) {
const isException = exceptionsRegister.has(entry[0]); const isException = exceptionsRegister.has(token);
if ( isException === false ) { if ( isException === false ) {
out.push(entry[1]); out.push(code);
} }
if ( loggerEnabled ) { if ( loggerEnabled ) {
logOne(isException, entry[0], request); logOne(isException, token, request);
} }
} }
@ -408,7 +394,7 @@
}; };
api.fromSelfie = function(selfie) { api.fromSelfie = function(selfie) {
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie); scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(1, selfie);
}; };
return api; return api;

View File

@ -486,83 +486,111 @@
// Public classes // Public classes
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
api.HostnameBasedDB = function(selfie) { api.HostnameBasedDB = class {
if ( selfie !== undefined ) {
this.db = new Map(selfie.map);
this.size = selfie.size;
} else {
this.db = new Map();
this.size = 0;
}
};
api.HostnameBasedDB.prototype = { constructor(nBits, selfie = undefined) {
add: function(hash, entry) { this.nBits = nBits;
let bucket = this.db.get(hash); this.timer = undefined;
if ( bucket === undefined ) { this.strToIdMap = new Map();
this.db.set(hash, entry); if ( selfie !== undefined ) {
} else if ( Array.isArray(bucket) ) { this.fromSelfie(selfie);
bucket.push(entry); return;
} else {
this.db.set(hash, [ bucket, entry ]);
} }
this.size += 1; this.hostnameToSlotIdMap = new Map();
}, this.hostnameSlots = [];
clear: function() { this.strSlots = [];
this.db.clear();
this.size = 0; this.size = 0;
}
store(hn, bits, s) {
this.size += 1;
let iStr = this.strToIdMap.get(s);
if ( iStr === undefined ) {
iStr = this.strSlots.length;
this.strSlots.push(s);
this.strToIdMap.set(s, iStr);
if ( this.timer === undefined ) {
this.collectGarbage(true);
}
}
const strId = iStr << this.nBits | bits;
const iHn = this.hostnameToSlotIdMap.get(hn);
if ( iHn === undefined ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
this.hostnameSlots.push(strId);
return;
}
const bucket = this.hostnameSlots[iHn];
if ( Array.isArray(bucket) ) {
bucket.push(strId);
} else {
this.hostnameSlots[iHn] = [ bucket, strId ];
}
}
clear() {
this.hostnameToSlotIdMap.clear();
this.hostnameSlots.length = 0;
this.strSlots.length = 0;
this.strToIdMap.clear();
this.size = 0;
}
collectGarbage(async = false) {
if ( async === false ) {
if ( this.timer !== undefined ) {
self.cancelIdleCallback(this.timer);
this.timer = undefined;
}
this.strToIdMap.clear();
return;
}
if ( this.timer !== undefined ) { return; }
this.timer = self.requestIdleCallback(
( ) => {
this.timer = undefined;
this.strToIdMap.clear();
}, },
retrieve: function(hash, hostname, out) { { timeout: 10000 }
let bucket = this.db.get(hash); );
if ( bucket === undefined ) { return; }
if ( Array.isArray(bucket) === false ) {
bucket = [ bucket ];
} }
for ( let entry of bucket ) {
if ( hostname.endsWith(entry.hostname) === false ) { retrieve(hostname, out) {
continue; const mask = out.length - 1; // out.length must be power of two
for (;;) {
const filterId = this.hostnameToSlotIdMap.get(hostname);
if ( filterId !== undefined ) {
const bucket = this.hostnameSlots[filterId];
if ( Array.isArray(bucket) ) {
for ( const id of bucket ) {
out[id & mask].add(this.strSlots[id >>> this.nBits]);
} }
let i = hostname.length - entry.hostname.length; } else {
if ( out[bucket & mask].add(this.strSlots[bucket >>> this.nBits]);
i === 0 ||
i === hostname.length ||
hostname.charCodeAt(i-1) === 0x2E /* '.' */
) {
out.push(entry);
} }
} }
}, if ( hostname === '' ) { break; }
toSelfie: function() { const pos = hostname.indexOf('.');
hostname = pos !== -1 ? hostname.slice(pos + 1) : '';
}
}
toSelfie() {
return { return {
map: Array.from(this.db), hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
hostnameSlots: this.hostnameSlots,
strSlots: this.strSlots,
size: this.size size: this.size
}; };
} }
};
api.HostnameBasedDB.prototype[Symbol.iterator] = (function() { fromSelfie(selfie) {
const Iter = function(db) { this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
this.mapIter = db.values(); this.hostnameSlots = selfie.hostnameSlots;
this.arrayIter = undefined; this.strSlots = selfie.strSlots;
}; this.size = selfie.size;
Iter.prototype.next = function() {
let result;
if ( this.arrayIter !== undefined ) {
result = this.arrayIter.next();
if ( result.done === false ) { return result; }
this.arrayIter = undefined;
} }
result = this.mapIter.next();
if ( result.done || Array.isArray(result.value) === false ) {
return result;
}
this.arrayIter = result.value[Symbol.iterator]();
return this.arrayIter.next(); // array should never be empty
}; };
return function() {
return new Iter(this.db);
};
})();
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// Public methods // Public methods
@ -584,60 +612,6 @@
resetParsed(parsed); resetParsed(parsed);
}; };
// HHHHHHHHHHHH0000
// | |
// | |
// | +-- bit 3-0: reserved
// +------ bit 15-4: FNV
api.makeHash = function(token) {
// Based on: FNV32a
// http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
// The rest is custom, suited for uBlock.
const i1 = token.length;
if ( i1 === 0 ) { return 0; }
const i2 = i1 >> 1;
const i4 = i1 >> 2;
const i8 = i1 >> 3;
let hval = (0x811c9dc5 ^ token.charCodeAt(0)) >>> 0;
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i4+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i8);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i2+i4);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval ^= token.charCodeAt(i1-1);
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
hval >>>= 0;
hval &= 0xFFF0;
// Can't return 0, it's reserved for empty string.
return hval !== 0 ? hval : 0xfff0;
};
api.compileHostnameToHash = function(hostname) {
let domain;
if ( hostname.endsWith('.*') ) {
const pos = hostname.lastIndexOf('.', hostname.length - 3);
domain = pos !== -1 ? hostname.slice(pos + 1) : hostname;
} else {
domain = µb.URI.domainFromHostname(hostname);
}
return api.makeHash(domain);
};
// https://github.com/chrisaljoudi/uBlock/issues/1004 // https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors. // Detect and report invalid CSS selectors.