mirror of https://github.com/gorhill/uBlock.git
Increase resolution of known-token lookup table
Related commit:
- 69a43e07c4
Using 32 bits of token hash rather than just the 16 lower
bits does help discard more unknown tokens.
Using the default filter lists, the known-token lookup
table is populated by 12,276 entries, out of 65,536, thus
making the case that theoretically there is a lot of
possible tokens which can be discarded.
In practice, running the built-in
staticNetFilteringEngine.benchmark() with default filter
lists, I find that 1,518,929 tokens were skipped out of
4,441,891 extracted tokens, or 34%.
This commit is contained in:
parent
60938451ab
commit
96dce22218
|
@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line
|
||||||
// Read-only
|
// Read-only
|
||||||
systemSettings: {
|
systemSettings: {
|
||||||
compiledMagic: 12, // Increase when compiled format changes
|
compiledMagic: 12, // Increase when compiled format changes
|
||||||
selfieMagic: 12 // Increase when selfie format changes
|
selfieMagic: 13 // Increase when selfie format changes
|
||||||
},
|
},
|
||||||
|
|
||||||
restoreBackupSettings: {
|
restoreBackupSettings: {
|
||||||
|
|
|
@ -2326,7 +2326,6 @@ FilterContainer.prototype.freeze = function() {
|
||||||
const filterDataHolderId = FilterDataHolder.fid;
|
const filterDataHolderId = FilterDataHolder.fid;
|
||||||
const redirectTypeValue = typeNameToTypeValue.redirect;
|
const redirectTypeValue = typeNameToTypeValue.redirect;
|
||||||
const unserialize = µb.CompiledLineIO.unserialize;
|
const unserialize = µb.CompiledLineIO.unserialize;
|
||||||
const knownTokens = this.urlTokenizer.knownTokens;
|
|
||||||
|
|
||||||
for ( const line of this.goodFilters ) {
|
for ( const line of this.goodFilters ) {
|
||||||
if ( this.badFilters.has(line) ) {
|
if ( this.badFilters.has(line) ) {
|
||||||
|
@ -2358,7 +2357,7 @@ FilterContainer.prototype.freeze = function() {
|
||||||
entry.next = bucket;
|
entry.next = bucket;
|
||||||
}
|
}
|
||||||
this.dataFilters.set(tokenHash, entry);
|
this.dataFilters.set(tokenHash, entry);
|
||||||
knownTokens[tokenHash & 0xFFFF] = 1;
|
this.urlTokenizer.addKnownToken(tokenHash);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2405,7 +2404,7 @@ FilterContainer.prototype.freeze = function() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
knownTokens[tokenHash & 0xFFFF] = 1;
|
this.urlTokenizer.addKnownToken(tokenHash);
|
||||||
|
|
||||||
if ( entry === undefined ) {
|
if ( entry === undefined ) {
|
||||||
bucket.set(tokenHash, filterFromCompiledData(fdata));
|
bucket.set(tokenHash, filterFromCompiledData(fdata));
|
||||||
|
|
|
@ -81,11 +81,15 @@
|
||||||
|
|
||||||
resetKnownTokens() {
|
resetKnownTokens() {
|
||||||
this.knownTokens.fill(0);
|
this.knownTokens.fill(0);
|
||||||
this.knownTokens[this.dotTokenHash & 0xFFFF] = 1;
|
this.addKnownToken(this.dotTokenHash);
|
||||||
this.knownTokens[this.anyTokenHash & 0xFFFF] = 1;
|
this.addKnownToken(this.anyTokenHash);
|
||||||
this.knownTokens[this.anyHTTPSTokenHash & 0xFFFF] = 1;
|
this.addKnownToken(this.anyHTTPSTokenHash);
|
||||||
this.knownTokens[this.anyHTTPTokenHash & 0xFFFF] = 1;
|
this.addKnownToken(this.anyHTTPTokenHash);
|
||||||
this.knownTokens[this.noTokenHash & 0xFFFF] = 1;
|
this.addKnownToken(this.noTokenHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
addKnownToken(th) {
|
||||||
|
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenize on demand.
|
// Tokenize on demand.
|
||||||
|
@ -172,7 +176,7 @@
|
||||||
th = th * 64 + v;
|
th = th * 64 + v;
|
||||||
n += 1;
|
n += 1;
|
||||||
}
|
}
|
||||||
if ( knownTokens[th & 0xFFFF] !== 0 ) {
|
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
|
||||||
tokens[j+0] = th;
|
tokens[j+0] = th;
|
||||||
tokens[j+1] = ti;
|
tokens[j+1] = ti;
|
||||||
j += 2;
|
j += 2;
|
||||||
|
|
Loading…
Reference in New Issue