Increase resolution of known-token lookup table

Related commit:
- 69a43e07c4

Using 32 bits of token hash rather than just the 16 lower
bits does help discard more unknown tokens.

Using the default filter lists, the known-token lookup
table is populated by 12,276 entries, out of 65,536, thus
making the case that theoretically there is a lot of
possible tokens which can be discarded.

In practice, running the built-in
staticNetFilteringEngine.benchmark() with default filter
lists, I find that 1,518,929 tokens were skipped out of
4,441,891 extracted tokens, or 34%.
This commit is contained in:
Raymond Hill 2019-04-27 08:18:01 -04:00
parent 60938451ab
commit 96dce22218
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
3 changed files with 13 additions and 10 deletions

View File

@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 12, // Increase when compiled format changes compiledMagic: 12, // Increase when compiled format changes
selfieMagic: 12 // Increase when selfie format changes selfieMagic: 13 // Increase when selfie format changes
}, },
restoreBackupSettings: { restoreBackupSettings: {

View File

@ -2326,7 +2326,6 @@ FilterContainer.prototype.freeze = function() {
const filterDataHolderId = FilterDataHolder.fid; const filterDataHolderId = FilterDataHolder.fid;
const redirectTypeValue = typeNameToTypeValue.redirect; const redirectTypeValue = typeNameToTypeValue.redirect;
const unserialize = µb.CompiledLineIO.unserialize; const unserialize = µb.CompiledLineIO.unserialize;
const knownTokens = this.urlTokenizer.knownTokens;
for ( const line of this.goodFilters ) { for ( const line of this.goodFilters ) {
if ( this.badFilters.has(line) ) { if ( this.badFilters.has(line) ) {
@ -2358,7 +2357,7 @@ FilterContainer.prototype.freeze = function() {
entry.next = bucket; entry.next = bucket;
} }
this.dataFilters.set(tokenHash, entry); this.dataFilters.set(tokenHash, entry);
knownTokens[tokenHash & 0xFFFF] = 1; this.urlTokenizer.addKnownToken(tokenHash);
continue; continue;
} }
@ -2405,7 +2404,7 @@ FilterContainer.prototype.freeze = function() {
continue; continue;
} }
knownTokens[tokenHash & 0xFFFF] = 1; this.urlTokenizer.addKnownToken(tokenHash);
if ( entry === undefined ) { if ( entry === undefined ) {
bucket.set(tokenHash, filterFromCompiledData(fdata)); bucket.set(tokenHash, filterFromCompiledData(fdata));

View File

@ -81,11 +81,15 @@
resetKnownTokens() { resetKnownTokens() {
this.knownTokens.fill(0); this.knownTokens.fill(0);
this.knownTokens[this.dotTokenHash & 0xFFFF] = 1; this.addKnownToken(this.dotTokenHash);
this.knownTokens[this.anyTokenHash & 0xFFFF] = 1; this.addKnownToken(this.anyTokenHash);
this.knownTokens[this.anyHTTPSTokenHash & 0xFFFF] = 1; this.addKnownToken(this.anyHTTPSTokenHash);
this.knownTokens[this.anyHTTPTokenHash & 0xFFFF] = 1; this.addKnownToken(this.anyHTTPTokenHash);
this.knownTokens[this.noTokenHash & 0xFFFF] = 1; this.addKnownToken(this.noTokenHash);
}
addKnownToken(th) {
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
} }
// Tokenize on demand. // Tokenize on demand.
@ -172,7 +176,7 @@
th = th * 64 + v; th = th * 64 + v;
n += 1; n += 1;
} }
if ( knownTokens[th & 0xFFFF] !== 0 ) { if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
tokens[j+0] = th; tokens[j+0] = th;
tokens[j+1] = ti; tokens[j+1] = ti;
j += 2; j += 2;