Squashed commit of the following:

commit 7c6cacc59b27660fabacb55d668ef099b222a9e6
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Sat Nov 3 08:52:51 2018 -0300

    code review: finalize support for wasm-based hntrie

commit 8596ed80e3bdac2c36e3c860b51e7189f6bc8487
Merge: cbe1f2e 000eb82
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Sat Nov 3 08:41:40 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit cbe1f2e2f38484d42af3204ec7f1b5decd30f99e
Merge: 270fc7f dbb7e80
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 17:43:20 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit 270fc7f9b3b73d79e6355522c1a42ce782fe7e5c
Merge: d2a89cf d693d4f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 16:21:08 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit d2a89cf28f0816ffd4617c2c7b4ccfcdcc30e1b4
Merge: d7afc78 649f82f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 14:54:58 2018 -0300

    Merge branch 'master' of github.com:gorhill/uBlock into trie-wasm

commit d7afc78b5f5675d7d34c5a1d0ec3099a77caef49
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 13:56:11 2018 -0300

    finalize wasm-based hntrie implementation

commit e7b9e043cf36ad055791713e34eb0322dec84627
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Fri Nov 2 08:14:02 2018 -0300

    add first-pass implementation of wasm version of hntrie

commit 1015cb34624f3ef73ace58b58fe4e03dfc59897f
Author: Raymond Hill <rhill@raymondhill.net>
Date:   Wed Oct 31 17:16:47 2018 -0300

    back up draft work toward experimenting with wasm hntries
This commit is contained in:
Raymond Hill 2018-11-03 08:58:46 -03:00
parent 000eb82f08
commit d7d544cda0
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
10 changed files with 47177 additions and 668 deletions

View File

@ -33,12 +33,12 @@ if ( vAPI.webextFlavor === undefined ) {
/******************************************************************************/ /******************************************************************************/
var µBlock = (function() { // jshint ignore:line const µBlock = (function() { // jshint ignore:line
var oneSecond = 1000, const oneSecond = 1000,
oneMinute = 60 * oneSecond; oneMinute = 60 * oneSecond;
var hiddenSettingsDefault = { const hiddenSettingsDefault = {
assetFetchTimeout: 30, assetFetchTimeout: 30,
autoUpdateAssetFetchPeriod: 120, autoUpdateAssetFetchPeriod: 120,
autoUpdatePeriod: 7, autoUpdatePeriod: 7,
@ -56,7 +56,7 @@ var µBlock = (function() { // jshint ignore:line
userResourcesLocation: 'unset' userResourcesLocation: 'unset'
}; };
var whitelistDefault = [ const whitelistDefault = [
'about-scheme', 'about-scheme',
'chrome-extension-scheme', 'chrome-extension-scheme',
'chrome-scheme', 'chrome-scheme',

View File

@ -1,7 +1,7 @@
/******************************************************************************* /*******************************************************************************
uBlock Origin - a browser extension to block requests. uBlock Origin - a browser extension to block requests.
Copyright (C) 2017 Raymond Hill Copyright (C) 2017-present Raymond Hill
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -19,6 +19,9 @@
Home: https://github.com/gorhill/uBlock Home: https://github.com/gorhill/uBlock
*/ */
/* globals WebAssembly */
/* exported hnTrieManager */
'use strict'; 'use strict';
/******************************************************************************* /*******************************************************************************
@ -37,89 +40,115 @@
For example, `www.abc.com` is deemed matching `abc.com`, because the former For example, `www.abc.com` is deemed matching `abc.com`, because the former
is a subdomain of the latter. The opposite is of course not true. is a subdomain of the latter. The opposite is of course not true.
The resulting read-only trie created as a result of using HNTrieBuilder are The resulting read-only tries created as a result of using hnTrieManager are
simply just typed arrays filled with integers. The matching algorithm is simply just typed arrays filled with integers. The matching algorithm is
just a matter of reading/comparing these integers, and further using them as just a matter of reading/comparing these integers, and further using them as
indices in the array as a way to move around in the trie. indices in the array as a way to move around in the trie.
There is still place for optimizations. Specifically, I could force the
strings to be properly sorted so that `HNTrie.matches` could bail earlier
when trying to find a matching descendant -- but suspect the gain would be
marginal, if measurable.
[1] To solve <https://github.com/gorhill/uBlock/issues/3193> [1] To solve <https://github.com/gorhill/uBlock/issues/3193>
*/ */
var HNTrieBuilder = function() { const hnTrieManager = {
this.reset(); tree: null,
}; treesz: 0,
trie: new Uint8Array(65536),
trie32: null,
triesz: 256, // bytes 0-254: decoded needle, byte 255: needle length
id: 0,
needle: '',
wasmLoading: null,
wasmMemory: null,
cleanupToken: 0,
cleanupTimer: undefined,
/******************************************************************************* reset: function() {
if ( this.wasmMemory === null && this.trie.byteLength > 65536 ) {
this.trie = new Uint8Array(65536);
this.trie32 = new Uint32Array(this.trie.buffer);
} else {
this.trie.fill(0);
}
this.triesz = 256;
this.needle = '';
this.id += 1;
},
A plain javascript array is used to build the trie. It will be casted into readyToUse: function() {
the appropriate read-only TypedArray[1] at vacuum time. return this.wasmLoading instanceof Promise
? this.wasmLoading
: Promise.resolve();
},
[1] Depending on the size: Uint8Array, Uint16Array, or Uint32Array. isValidRef: function(ref) {
return ref !== null && ref.id === this.id;
},
*/ setNeedle: function(needle) {
if ( needle !== this.needle ) {
HNTrieBuilder.prototype.reset = function() { const buf = this.trie;
this.buf = []; let i = needle.length;
this.bufsz = 0; buf[255] = i;
this.buf[0] = 0; while ( i-- ) {
this.buf[1] = 0; buf[i] = needle.charCodeAt(i);
this.buf[2] = 0; }
this.needle = needle;
}
return this; return this;
}; },
/******************************************************************************* matchesJS: function(itrie) {
const buf = this.trie;
Helpers for convenience. const buf32 = this.trie32;
let ineedle = buf[255];
*/
HNTrieBuilder.fromDomainOpt = function(domainOpt) {
var builder = new HNTrieBuilder();
builder.fromDomainOpt(domainOpt);
return builder.vacuum();
};
HNTrieBuilder.fromIterable = function(hostnames) {
var builder = new HNTrieBuilder();
builder.fromIterable(hostnames);
return builder.vacuum();
};
HNTrieBuilder.print = function(trie) {
var buf = trie.buf,
i = 0, cc = [], ic, indent = 0,
forks = [];
for (;;) { for (;;) {
if ( buf[i] !== 0 ) { ineedle -= 1;
forks.push(i, indent); const nchar = ineedle === -1 ? 0 : buf[ineedle];
for (;;) {
const tchar = buf[itrie+8]; // quick test: first character
if ( tchar === nchar ) { break; }
if ( tchar === 0 && nchar === 0x2E ) { return 1; }
itrie = buf32[itrie >>> 2];
if ( itrie === 0 ) { return 0; } // no more descendants
} }
cc.unshift(buf[i+2]); if ( nchar === 0 ) { return 1; }
for ( ic = 0; ic < buf[i+3]; ic++ ) { let lxtra = buf[itrie+9]; // length of extra charaters
cc.unshift(buf[i+4+ic]); if ( lxtra !== 0 ) { // cell is only one character
if ( lxtra > ineedle ) { return 0; }
let ixtra = itrie + 10;
lxtra += ixtra;
do {
ineedle -= 1;
if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
ixtra += 1;
} while ( ixtra !== lxtra );
} }
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc)); itrie = buf32[itrie + 4 >>> 2];
indent += cc.length; if ( itrie === 0 ) {
cc = []; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
i = buf[i+1];
if ( i === 0 ) {
if ( forks.length === 0 ) { break; }
indent = forks.pop();
i = forks.pop();
i = buf[i];
} }
} }
}; },
matchesWASM: null,
matches: null,
/******************************************************************************* start: function() {
if ( this.trie32 === null ) {
this.trie32 = new Uint32Array(this.trie.buffer);
}
this.treesz = 0;
if ( this.tree === null ) {
this.tree = new Uint32Array(16384);
}
this.tree[0] = 0;
this.tree[1] = 0;
this.tree[2] = 0;
},
Since this trie is specialized for matching hostnames, the stored strings are /***************************************************************************
reversed internally, because of hostname comparison logic:
Since this trie is specialized for matching hostnames, the stored
strings are reversed internally, because of hostname comparison logic:
Correct matching: Correct matching:
index 0123456 index 0123456
@ -137,87 +166,57 @@ HNTrieBuilder.print = function(trie) {
*/ */
HNTrieBuilder.prototype.add = function(hn) { add: function(hn) {
var ichar = hn.length - 1; // 256 * 3 + 3 = 771
if ( this.treesz + 771 >= this.tree.length ) {
this.growTree();
}
let ichar = hn.length - 1;
if ( ichar === -1 ) { return; } if ( ichar === -1 ) { return; }
var c = hn.charCodeAt(ichar), let c = hn.charCodeAt(ichar),
i = 0, inext; i = 0, inext;
for (;;) { for (;;) {
if ( this.buf[i+2] !== c ) { // match not found if ( this.tree[i+2] !== c ) { // match not found
inext = this.buf[i]; // move to descendant inext = this.tree[i]; // move to descendant
if ( inext === 0 ) { break; } // no descendant if ( inext === 0 ) { break; } // no descendant
} else { // match found } else { // match found
if ( c === 0 ) { return; } if ( c === 0 ) { return; }
inext = this.buf[i+1]; // move to sibling inext = this.tree[i+1]; // move to sibling
ichar -= 1; ichar -= 1;
c = ichar === -1 ? 0 : hn.charCodeAt(ichar); c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
} }
i = inext; i = inext;
} }
// Any new string added will always cause a new descendant to be created. // Any new string added will always cause a new descendant to be
// The only time this is not the case is when trying to store a string // created. The only time this is not the case is when trying to
// which is already in the trie. // store a string which is already in the trie.
inext = this.bufsz; // new descendant cell inext = this.treesz; // new descendant cell
this.buf[i] = inext; this.tree[i] = inext;
this.buf[inext+0] = 0; // jump index to descendant this.tree[inext+0] = 0; // jump index to descendant
this.buf[inext+1] = 0; // jump index to sibling this.tree[inext+1] = 0; // jump index to sibling
this.buf[inext+2] = c; // character code this.tree[inext+2] = c; // character code
this.bufsz += 3; this.treesz += 3;
if ( c === 0 ) { return; } // character zero is always last cell if ( c === 0 ) { return; } // character zero is always last cell
do { do {
i = inext; // new branch sprouting made from i = inext; // new branch sprouting made from
ichar -= 1; // all characters left to store ichar -= 1; // all characters left to store
c = ichar === -1 ? 0 : hn.charCodeAt(ichar); c = ichar === -1 ? 0 : hn.charCodeAt(ichar);
inext = this.bufsz; inext = this.treesz;
this.buf[i+1] = inext; this.tree[i+1] = inext;
this.buf[inext+0] = 0; this.tree[inext+0] = 0;
this.buf[inext+1] = 0; this.tree[inext+1] = 0;
this.buf[inext+2] = c; this.tree[inext+2] = c;
this.bufsz += 3; this.treesz += 3;
} while ( c!== 0 ); } while ( c!== 0 );
}; },
/******************************************************************************* growTree: function() {
let tree = new Uint32Array(this.tree.length + 16384);
tree.set(this.tree);
this.tree = tree;
},
Not using String.split('|') to avoid memory churning. /***************************************************************************
*/
HNTrieBuilder.prototype.fromDomainOpt = function(hostnames) {
return this.fromIterable(hostnames.split('|'));
};
HNTrieBuilder.prototype.fromIterable = function(hostnames) {
var hns = Array.from(hostnames).sort(function(a, b) {
return a.length - b.length;
});
// https://github.com/gorhill/uBlock/issues/3328
// Must sort from shortest to longest.
for ( var hn of hns ) {
this.add(hn);
}
return this;
};
/******************************************************************************/
HNTrieBuilder.prototype.matches = function(needle) {
var ichar = needle.length - 1,
buf = this.buf, i = 0, c;
for (;;) {
c = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( buf[i+2] !== c ) {
i = buf[i];
if ( i === 0 ) { return false; }
}
if ( c === 0 ) { return true; }
i = buf[i+1];
if ( i === 0 ) { return c === 0x2E; }
ichar -= 1;
}
};
/*******************************************************************************
Before vacuuming, each cell is 3 entry-long: Before vacuuming, each cell is 3 entry-long:
- Jump index to descendant (if any) - Jump index to descendant (if any)
@ -225,9 +224,9 @@ HNTrieBuilder.prototype.matches = function(needle) {
- character code - character code
All strings stored in the un-vacuumed trie are zero-terminated, and the All strings stored in the un-vacuumed trie are zero-terminated, and the
character zero does occupy a cell like any other character. Let's use _ to character zero does occupy a cell like any other character. Let's
represent character zero for sake of comments. The asterisk will be used to use _ to represent character zero for sake of comments. The asterisk
highlight a node with a descendant. will be used to highlight a node with a descendant.
Cases, before vacuuming: Cases, before vacuuming:
@ -251,14 +250,14 @@ HNTrieBuilder.prototype.matches = function(needle) {
_ -- b -- . -- c -- o -- m _ -- b -- . -- c -- o -- m
_ -- a _ -- a
Vacuuming is the process of merging sibling cells with no descendants. Cells Vacuuming is the process of merging sibling cells with no descendants.
with descendants can't be merged. Cells with descendants can't be merged.
Each time we arrive at the end of a horizontal branch (sibling jump index is Each time we arrive at the end of a horizontal branch (sibling jump
0), we walk back to the nearest previous node with descendants, and repeat index is 0), we walk back to the nearest previous node with descendants,
the process. Since there is no index information on where to come back, a and repeat the process. Since there is no index information on where to
stack is used to remember cells with descendants (descendant jump index is come back, a stack is used to remember cells with descendants (descendant
non zero) encountered on the way jump index is non zero) encountered on the way
After vacuuming, each cell is 4+n entry-long: After vacuuming, each cell is 4+n entry-long:
- Jump index to descendant (if any) - Jump index to descendant (if any)
@ -292,40 +291,55 @@ HNTrieBuilder.prototype.matches = function(needle) {
It's not possible for a character zero cell to have next siblings. It's not possible for a character zero cell to have next siblings.
This will have to be taken into account during both vacuuming and matching. This will have to be taken into account during both vacuuming and
matching.
Character zero cells with no descendant are discarded during vacuuming. Character zero cells with no descendant are discarded during vacuuming.
Character zero cells with a descendant, or character zero cells which are a Character zero cells with a descendant, or character zero cells which
decendant are kept into the vacuumed trie. are a decendant are kept into the vacuumed trie.
A vacuumed trie is very efficient memory- and lookup-wise, but is also A vacuumed trie is very efficient memory- and lookup-wise, but is also
read-only: no string can be added or removed. The read-only trie is really read-only: no string can be added or removed. The read-only trie is
just a self-sufficient array of integers, and can easily be exported/imported really just a self-sufficient array of integers, and can easily be
as a JSON array. It is theoretically possible to "decompile" a trie (vacuumed exported/imported as a JSON array. It is theoretically possible to
or not) into the set of strings originally added to it (in the order they "decompile" a trie (vacuumed or not) into the set of strings originally
were added with the current implementation), but so far I do not need this added to it (in the order they were added with the current
feature. implementation), but so far I do not need this feature.
TODO: It's possible to build the vacuumed trie on the fly as items are New vacuum output array format:
added to it. I need to carefully list all possible cases which can arise byte 0..2: offset to descendant
at insertion time. The benefits will be: faster creation time (expected), no byte 3..5: offset to sibling
longer read-only trie (items can be added at any time). byte 6: first character
byte 7: number of extra characters
Offset & count values are little-endian.
3 + 3 + 1 + 1 = 8 bytes for one character, otherwise
3 + 3 + 1 + 1 + n = 8 + n bytes for one + n character(s)
*/ */
HNTrieBuilder.prototype.vacuum = function() { finish: function() {
if ( this.bufsz === 0 ) { return null; } if ( this.treesz === 0 ) { return null; }
var input = this.buf, const input = this.tree,
output = [], outsz = 0, iout0 = this.triesz,
forks = [], forks = [];
iin = 0, iout; let output = this.trie,
output32 = this.trie32,
iout1 = iout0,
iout2 = output.byteLength,
iin = 0;
for (;;) { for (;;) {
iout = outsz; if ( (iout1 + 266) >= iout2 ) {
output[iout+0] = 0; this.growTrie();
output[iout+1] = 0; output = this.trie;
output[iout+2] = input[iin+2]; // first character output32 = this.trie32;
output[iout+3] = 0; iout2 = output.byteLength;
outsz += 4; }
let iout = iout1;
output32[iout >>> 2] = 0;
output32[iout + 4 >>> 2] = 0;
output[iout+8] = input[iin+2]; // first character
output[iout+9] = 0; // extra character count
iout1 += 10;
if ( input[iin] !== 0 ) { // cell with descendant if ( input[iin] !== 0 ) { // cell with descendant
forks.push(iout, iin); // defer processing forks.push(iout, iin); // defer processing
} }
@ -334,264 +348,184 @@ HNTrieBuilder.prototype.vacuum = function() {
if ( iin === 0 ) { break; } // no more sibling cell if ( iin === 0 ) { break; } // no more sibling cell
if ( input[iin] !== 0 ) { break; } // cell with a descendant if ( input[iin] !== 0 ) { break; } // cell with a descendant
if ( input[iin+2] === 0 ) { break; } // don't merge \x00 if ( input[iin+2] === 0 ) { break; } // don't merge \x00
output[outsz] = input[iin+2]; // add character data output[iout1] = input[iin+2]; // add character data
outsz += 1; iout1 += 1;
} }
if ( outsz !== iout + 4 ) { // cells were merged if ( iout1 !== iout + 10 ) { // cells were merged
output[iout+3] = outsz - iout - 4; // so adjust count output[iout+9] = iout1 - iout - 10; // so adjust count
} }
iout1 = (iout1 + 3) & ~3; // align to i32
if ( iin !== 0 && input[iin] !== 0 ) { // can't merge this cell if ( iin !== 0 && input[iin] !== 0 ) { // can't merge this cell
output[iout+1] = outsz; output32[iout + 4 >>> 2] = iout1;
continue; continue;
} }
if ( forks.length === 0 ) { break; } // no more descendants: bye if ( forks.length === 0 ) { break; } // no more descendants: bye
iin = forks.pop(); // process next descendant iin = forks.pop(); // process next descendant
iout = forks.pop(); iout = forks.pop();
iin = input[iin]; iin = input[iin];
output[iout] = outsz; output32[iout >>> 2] = iout1;
} }
var trie; // pick optimal read-only this.triesz = iout1;
if ( outsz < 256 ) { // container array. this.cleanupAsync();
trie = new this.HNTrie8(output, outsz); return new HNTrieRef(iout0);
} else if ( outsz < 65536 ) { },
trie = new this.HNTrie16(output, outsz);
fromIterable: function(hostnames) {
this.start();
const hns = Array.from(hostnames).sort(function(a, b) {
return a.length - b.length;
});
// https://github.com/gorhill/uBlock/issues/3328
// Must sort from shortest to longest.
for ( let hn of hns ) {
this.add(hn);
}
return this.finish();
},
fromDomainOpt: function(hostnames) {
return this.fromIterable(hostnames.split('|'));
},
growTrie: function() {
let trie;
if ( this.wasmMemory === null ) {
trie = new Uint8Array(this.trie.byteLength + 65536);
trie.set(this.trie);
} else { } else {
trie = new this.HNTrie32(output, outsz); this.wasmMemory.grow(1);
trie = new Uint8Array(this.wasmMemory.buffer);
} }
this.reset(); // free working array this.trie = trie;
return trie; this.trie32 = new Uint32Array(this.trie.buffer);
}; },
/******************************************************************************* cleanupAsync: function() {
if ( this.cleanupTimer === undefined ) {
The following internal classes are the actual output of the vacuum() method. this.cleanupToken = this.triesz;
this.cleanupTimer = setTimeout(( ) => {
They use the minimal amount of data to be able to efficiently lookup strings this.cleanupTimer = undefined;
in a read-only trie. if ( this.cleanupToken !== this.triesz ) {
this.cleanupAsync();
Given that javascript optimizers mind that the type of an argument passed to } else {
a function always stays the same each time the function is called, there need this.tree = null;
to be three separate implementation of matches() to allow the javascript }
optimizer to do its job. }, 30000);
}
The matching code deals only with looking up values in a TypedArray (beside },
calls to String.charCodeAt), so I expect this to be fast and good candidate
for optimization by javascript engines.
// For debugging purpose
// TODO: currently broken, needs to be fixed as per new buffer format.
/*
print: function(offset) {
let i = offset, cc = [], indent = 0,
forks = [];
for (;;) {
if ( buf[i] !== 0 ) {
forks.push(i, indent);
}
cc.unshift(buf[i+2]);
for ( let ic = 0; ic < buf[i+3]; ic++ ) {
cc.unshift(buf[i+4+ic]);
}
console.log('\xB7'.repeat(indent) + String.fromCharCode.apply(null, cc));
indent += cc.length;
cc = [];
i = buf[i+1];
if ( i === 0 ) {
if ( forks.length === 0 ) { break; }
indent = forks.pop();
i = forks.pop();
i = buf[i];
}
}
},
*/ */
HNTrieBuilder.prototype.HNTrie8 = function(buf, bufsz) {
this.buf = new Uint8Array(buf.slice(0, bufsz));
}; };
HNTrieBuilder.prototype.HNTrie8.prototype.matches = function(needle) { /******************************************************************************/
var ichar = needle.length,
i = 0, c1, c2, ccnt, ic, i1, i2;
for (;;) {
ichar -= 1;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
}
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
HNTrieBuilder.prototype.HNTrie16 = function(buf, bufsz) { (function() {
this.buf = new Uint16Array(buf.slice(0, bufsz)); // Default to javascript version.
}; hnTrieManager.matches = hnTrieManager.matchesJS;
HNTrieBuilder.prototype.HNTrie16.prototype.matches = function(needle) { if (
var ichar = needle.length, typeof WebAssembly !== 'object' ||
i = 0, c1, c2, ccnt, ic, i1, i2; typeof WebAssembly.instantiateStreaming !== 'function'
for (;;) { ) {
ichar -= 1; return;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
} }
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
HNTrieBuilder.prototype.HNTrie32 = function(buf, bufsz) { // Soft-dependency on vAPI so that the code here can be used outside of
this.buf = new Uint32Array(buf.slice(0, bufsz)); // uBO (i.e. tests, benchmarks)
}; if (
typeof vAPI === 'object' &&
HNTrieBuilder.prototype.HNTrie32.prototype.matches = function(needle) { vAPI.webextFlavor.soup.has('firefox') === false
var ichar = needle.length, ) {
i = 0, c1, c2, ccnt, ic, i1, i2; return;
for (;;) {
ichar -= 1;
c1 = ichar === -1 ? 0 : needle.charCodeAt(ichar);
while ( (c2 = this.buf[i+2]) !== c1 ) { // quick test: first character
if ( c2 === 0 && c1 === 0x2E ) { return true; }
i = this.buf[i]; // next descendant
if ( i === 0 ) { return false; } // no more descendants
} }
if ( c1 === 0 ) { return true; }
ccnt = this.buf[i+3];
if ( ccnt !== 0 ) { // cell is only one character
if ( ccnt > ichar ) { return false; }
ic = ccnt; i1 = ichar-1; i2 = i+4;
while ( ic-- && needle.charCodeAt(i1-ic) === this.buf[i2+ic] );
if ( ic !== -1 ) { return false; }
ichar -= ccnt;
}
i = this.buf[i+1]; // next sibling
if ( i === 0 ) {
return ichar === 0 || needle.charCodeAt(ichar-1) === 0x2E;
}
}
};
/******************************************************************************* // The wasm module will work only if CPU is natively little-endian,
// as we use native uint32 array in our trie-creation js code.
const uint32s = new Uint32Array(1);
const uint8s = new Uint8Array(uint32s.buffer);
uint32s[0] = 1;
if ( uint8s[0] !== 1 ) { return; }
Experimenting: WebAssembly version. let workingDir;
Developed using this simple online tool: https://wasdk.github.io/WasmFiddle/
>>> start of C code
unsigned short buffer[0];
int matches(int id, int cclen)
{ {
unsigned short* cc0 = &buffer[0]; const url = document.currentScript.src;
unsigned short* cc = cc0 + cclen; const match = /[^\/]+$/.exec(url);
unsigned short* cell0 = &buffer[512+id]; workingDir = match !== null
unsigned short* cell = cell0; ? url.slice(0, match.index)
unsigned short* ww; : '';
int c1, c2, ccnt;
for (;;) {
c1 = cc <= cc0 ? 0 : *--cc;
for (;;) {
c2 = cell[2];
if ( c2 == c1 ) { break; }
if ( c2 == 0 && c1 == 0x2E ) { return 1; }
if ( cell[0] == 0 ) { return 0; }
cell = cell0 + cell[0];
} }
if ( c1 == 0 ) { return 1; }
ccnt = cell[3];
if ( ccnt != 0 ) {
if ( cc - ccnt < cc0 ) { return 0; }
ww = cell + 4;
while ( ccnt-- ) {
if ( *--cc != *ww++ ) { return 0; }
}
}
if ( cell[1] == 0 ) {
if ( cc == cc0 ) { return 1; }
if ( *--cc == 0x2E ) { return 1; }
return 0;
}
cell = cell0 + cell[1];
}
}
int getLinearMemoryOffset() {
return (int)&buffer[0];
}
<<< end of C code
Observations: const memory = new WebAssembly.Memory({ initial: 1 });
- When growing memory, we must re-create the typed array js-side. The content
of the array is preserved by grow().
- It's slower than the javascript version... Possible explanations:
- Call overhead: https://github.com/WebAssembly/design/issues/1120
- Having to copy whole input string in buffer before call.
var HNTrie16wasm = (function() { hnTrieManager.wasmLoading = WebAssembly.instantiateStreaming(
var module; fetch(workingDir + 'wasm/hntrie.wasm', { mode: 'same-origin' }),
var instance; { imports: { memory } }
var memory; ).then(result => {
var memoryOrigin = 0; hnTrieManager.wasmLoading = null;
var memoryUsed = 1024; if ( !result || !result.instance ) { return; }
var cbuffer; const pageCount = hnTrieManager.trie.byteLength >>> 16;
var tbuffer; if ( pageCount > 1 ) {
var tbufferSize = 0; memory.grow(pageCount - 1);
var matchesFn;
var init = function() {
module = new WebAssembly.Module(new Uint8Array([0,97,115,109,1,0,0,0,1,139,128,128,128,0,2,96,2,127,127,1,127,96,0,1,127,3,131,128,128,128,0,2,0,1,4,132,128,128,128,0,1,112,0,0,5,131,128,128,128,0,1,0,1,6,129,128,128,128,0,0,7,172,128,128,128,0,3,6,109,101,109,111,114,121,2,0,7,109,97,116,99,104,101,115,0,0,21,103,101,116,76,105,110,101,97,114,77,101,109,111,114,121,79,102,102,115,101,116,0,1,10,217,130,128,128,0,2,202,130,128,128,0,1,5,127,32,1,65,1,116,65,12,106,33,3,32,0,65,1,116,65,140,8,106,34,2,33,0,2,64,2,64,2,64,2,64,2,64,2,64,3,64,65,0,33,5,2,64,32,3,65,12,77,13,0,32,3,65,126,106,34,3,47,1,0,33,5,11,2,64,32,5,32,0,47,1,4,34,1,70,13,0,2,64,32,5,65,46,71,13,0,3,64,32,1,65,255,255,3,113,69,13,5,32,0,47,1,0,34,1,69,13,6,32,2,32,1,65,1,116,106,34,0,47,1,4,34,1,65,46,71,13,0,12,2,11,11,3,64,32,0,47,1,0,34,1,69,13,3,32,5,32,2,32,1,65,1,116,106,34,0,47,1,4,71,13,0,11,11,65,1,33,6,32,5,69,13,5,2,64,2,64,32,0,47,1,6,34,1,69,13,0,32,3,32,1,65,1,116,107,65,12,73,13,8,32,1,65,127,115,33,5,32,0,65,8,106,33,1,3,64,32,5,65,1,106,34,5,69,13,1,32,1,47,1,0,33,4,32,1,65,2,106,33,1,32,4,32,3,65,126,106,34,3,47,1,0,70,13,0,12,2,11,11,32,0,47,1,2,34,1,69,13,5,32,2,32,1,65,1,116,106,33,0,12,1,11,11,65,0,15,11,65,0,15,11,65,1,15,11,65,0,15,11,32,3,65,12,70,13,0,32,3,65,126,106,47,1,0,65,46,70,33,6,11,32,6,15,11,65,0,11,132,128,128,128,0,0,65,12,11]));
instance = new WebAssembly.Instance(module);
memory = instance.exports.memory;
memoryOrigin = instance.exports.getLinearMemoryOffset();
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
memoryUsed = memoryOrigin + 1024;
matchesFn = instance.exports.matches;
};
return {
create: function(data) {
if ( module === undefined ) { init(); }
var bytesNeeded = memoryUsed + ((data.length * 2 + 3) & ~3);
if ( bytesNeeded > memory.buffer.byteLength ) {
memory.grow((bytesNeeded - memory.buffer.byteLength + 65535) >>> 16);
cbuffer = new Uint16Array(memory.buffer, memoryOrigin, 512);
tbuffer = new Uint16Array(memory.buffer, memoryOrigin + 1024);
} }
for ( var i = 0, j = tbufferSize; i < data.length; i++, j++ ) { const trie = new Uint8Array(memory.buffer);
tbuffer[j] = data[i]; trie.set(hnTrieManager.trie);
hnTrieManager.trie = trie;
if ( hnTrieManager.trie32 !== null ) {
hnTrieManager.trie32 = new Uint32Array(memory.buffer);
} }
var id = tbufferSize; hnTrieManager.wasmMemory = memory;
tbufferSize += data.length; hnTrieManager.matchesWASM = result.instance.exports.matches;
if ( tbufferSize & 1 ) { tbufferSize += 1; } hnTrieManager.matches = hnTrieManager.matchesWASM;
memoryUsed += tbufferSize * 2; }).catch(reason => {
return id; hnTrieManager.wasmLoading = null;
}, console.error(reason);
reset: function() { });
module = undefined;
instance = undefined;
memory = undefined;
memory.grow(1);
memoryUsed = 1024;
cbuffer = undefined;
tbuffer = undefined;
tbufferSize = 0;
},
matches: function(id, hn) {
var len = hn.length;
if ( len > 512 ) {
hn = hn.slice(-512);
var pos = hn.indexOf('.');
if ( pos !== 0 ) {
hn = hn.slice(pos + 1);
}
len = hn.length;
}
var needle = cbuffer, i = len;
while ( i-- ) {
needle[i] = hn.charCodeAt(i);
}
return matchesFn(id, len) === 1;
}
};
})(); })();
*/
/******************************************************************************/
const HNTrieRef = function(offset) {
this.id = hnTrieManager.id;
this.offset = offset;
};
HNTrieRef.prototype = {
isValid: function() {
return this.id === hnTrieManager.id;
},
matches: function(needle) {
return hnTrieManager.setNeedle(needle).matches(this.offset);
},
matchesJS: function(needle) {
return hnTrieManager.setNeedle(needle).matchesJS(this.offset);
},
matchesWASM: function(needle) {
return hnTrieManager.setNeedle(needle).matchesWASM(this.offset);
},
};

View File

@ -29,7 +29,7 @@
/******************************************************************************/ /******************************************************************************/
var µb = µBlock; const µb = µBlock;
/******************************************************************************/ /******************************************************************************/
@ -287,7 +287,12 @@ var onFirstFetchReady = function(fetched) {
onVersionReady(fetched.version); onVersionReady(fetched.version);
onCommandShortcutsReady(fetched.commandShortcuts); onCommandShortcutsReady(fetched.commandShortcuts);
µb.loadPublicSuffixList(onPSLReady); Promise.all([
µb.loadPublicSuffixList(),
µb.staticNetFilteringEngine.readyToUse()
]).then(( ) => {
onPSLReady();
});
µb.loadRedirectResources(); µb.loadRedirectResources();
}; };

View File

@ -20,7 +20,7 @@
*/ */
/* jshint bitwise: false */ /* jshint bitwise: false */
/* global punycode, HNTrieBuilder */ /* global punycode, hnTrieManager */
'use strict'; 'use strict';
@ -30,7 +30,7 @@
/******************************************************************************/ /******************************************************************************/
var µb = µBlock; const µb = µBlock;
// fedcba9876543210 // fedcba9876543210
// | | ||| // | | |||
@ -43,15 +43,15 @@ var µb = µBlock;
// | +-------- bit 4- 8: type [0 - 31] // | +-------- bit 4- 8: type [0 - 31]
// +------------- bit 9-15: unused // +------------- bit 9-15: unused
var BlockAction = 0 << 0; const BlockAction = 0 << 0;
var AllowAction = 1 << 0; const AllowAction = 1 << 0;
var Important = 1 << 1; const Important = 1 << 1;
var AnyParty = 0 << 2; const AnyParty = 0 << 2;
var FirstParty = 1 << 2; const FirstParty = 1 << 2;
var ThirdParty = 2 << 2; const ThirdParty = 2 << 2;
var AnyType = 0 << 4; const AnyType = 0 << 4;
var typeNameToTypeValue = { const typeNameToTypeValue = {
'no_type': 0 << 4, 'no_type': 0 << 4,
'stylesheet': 1 << 4, 'stylesheet': 1 << 4,
'image': 2 << 4, 'image': 2 << 4,
@ -75,9 +75,9 @@ var typeNameToTypeValue = {
'webrtc': 19 << 4, 'webrtc': 19 << 4,
'unsupported': 20 << 4 'unsupported': 20 << 4
}; };
var otherTypeBitValue = typeNameToTypeValue.other; const otherTypeBitValue = typeNameToTypeValue.other;
var typeValueToTypeName = { const typeValueToTypeName = {
1: 'stylesheet', 1: 'stylesheet',
2: 'image', 2: 'image',
3: 'object', 3: 'object',
@ -100,15 +100,15 @@ var typeValueToTypeName = {
20: 'unsupported' 20: 'unsupported'
}; };
var BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty; const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
var BlockAnyType = BlockAction | AnyType; const BlockAnyType = BlockAction | AnyType;
var BlockAnyParty = BlockAction | AnyParty; const BlockAnyParty = BlockAction | AnyParty;
var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty; const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
var AllowAnyType = AllowAction | AnyType; const AllowAnyType = AllowAction | AnyType;
var AllowAnyParty = AllowAction | AnyParty; const AllowAnyParty = AllowAction | AnyParty;
var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide, const genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generichide,
genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important; genericHideImportant = BlockAction | AnyParty | typeNameToTypeValue.generichide | Important;
// ABP filters: https://adblockplus.org/en/filters // ABP filters: https://adblockplus.org/en/filters
@ -119,7 +119,7 @@ var genericHideException = AllowAction | AnyParty | typeNameToTypeValue.generich
// See the following as short-lived registers, used during evaluation. They are // See the following as short-lived registers, used during evaluation. They are
// valid until the next evaluation. // valid until the next evaluation.
var pageHostnameRegister = '', let pageHostnameRegister = '',
requestHostnameRegister = ''; requestHostnameRegister = '';
//var filterRegister = null; //var filterRegister = null;
//var categoryRegister = ''; //var categoryRegister = '';
@ -127,13 +127,13 @@ var pageHostnameRegister = '',
// Local helpers // Local helpers
// Be sure to not confuse 'example.com' with 'anotherexample.com' // Be sure to not confuse 'example.com' with 'anotherexample.com'
var isFirstParty = function(domain, hostname) { const isFirstParty = function(domain, hostname) {
return hostname.endsWith(domain) && return hostname.endsWith(domain) &&
(hostname.length === domain.length || (hostname.length === domain.length ||
hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */); hostname.charCodeAt(hostname.length - domain.length - 1) === 0x2E /* '.' */);
}; };
var normalizeRegexSource = function(s) { const normalizeRegexSource = function(s) {
try { try {
var re = new RegExp(s); var re = new RegExp(s);
return re.source; return re.source;
@ -143,12 +143,12 @@ var normalizeRegexSource = function(s) {
return ''; return '';
}; };
var rawToRegexStr = function(s, anchor) { const rawToRegexStr = function(s, anchor) {
var me = rawToRegexStr; let me = rawToRegexStr;
// https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ // https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// Also: remove leading/trailing wildcards -- there is no point. // Also: remove leading/trailing wildcards -- there is no point.
var reStr = s.replace(me.escape1, '\\$&') let reStr = s.replace(me.escape1, '\\$&')
.replace(me.escape2, '(?:[^%.0-9a-z_-]|$)') .replace(me.escape2, '(?:[^%.0-9a-z_-]|$)')
.replace(me.escape3, '') .replace(me.escape3, '')
.replace(me.escape4, '[^ ]*?'); .replace(me.escape4, '[^ ]*?');
@ -175,7 +175,7 @@ rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
const filterDataSerialize = µb.CompiledLineIO.serialize; const filterDataSerialize = µb.CompiledLineIO.serialize;
var toLogDataInternal = function(categoryBits, tokenHash, filter) { const toLogDataInternal = function(categoryBits, tokenHash, filter) {
if ( filter === null ) { return undefined; } if ( filter === null ) { return undefined; }
let logData = filter.logData(); let logData = filter.logData();
logData.compiled = filterDataSerialize([ logData.compiled = filterDataSerialize([
@ -209,7 +209,7 @@ var toLogDataInternal = function(categoryBits, tokenHash, filter) {
}; };
// First character of match must be within the hostname part of the url. // First character of match must be within the hostname part of the url.
var isHnAnchored = function(url, matchStart) { const isHnAnchored = function(url, matchStart) {
var hnStart = url.indexOf('://'); var hnStart = url.indexOf('://');
if ( hnStart === -1 ) { return false; } if ( hnStart === -1 ) { return false; }
hnStart += 3; hnStart += 3;
@ -222,9 +222,9 @@ var isHnAnchored = function(url, matchStart) {
return url.charCodeAt(matchStart - 1) === 0x2E; return url.charCodeAt(matchStart - 1) === 0x2E;
}; };
var reURLPostHostnameAnchors = /[\/?#]/; const reURLPostHostnameAnchors = /[\/?#]/;
var arrayStrictEquals = function(a, b) { const arrayStrictEquals = function(a, b) {
var n = a.length; var n = a.length;
if ( n !== b.length ) { return false; } if ( n !== b.length ) { return false; }
var isArray, x, y; var isArray, x, y;
@ -251,22 +251,22 @@ var arrayStrictEquals = function(a, b) {
**/ **/
var filterClasses = [], const filterClasses = [];
filterClassIdGenerator = 0; let filterClassIdGenerator = 0;
var registerFilterClass = function(ctor) { const registerFilterClass = function(ctor) {
var fid = filterClassIdGenerator++; let fid = filterClassIdGenerator++;
ctor.fid = ctor.prototype.fid = fid; ctor.fid = ctor.prototype.fid = fid;
filterClasses[fid] = ctor; filterClasses[fid] = ctor;
}; };
var filterFromCompiledData = function(args) { const filterFromCompiledData = function(args) {
return filterClasses[args[0]].load(args); return filterClasses[args[0]].load(args);
}; };
/******************************************************************************/ /******************************************************************************/
var FilterTrue = function() { const FilterTrue = function() {
}; };
FilterTrue.prototype.match = function() { FilterTrue.prototype.match = function() {
@ -297,7 +297,7 @@ registerFilterClass(FilterTrue);
/******************************************************************************/ /******************************************************************************/
var FilterPlain = function(s, tokenBeg) { const FilterPlain = function(s, tokenBeg) {
this.s = s; this.s = s;
this.tokenBeg = tokenBeg; this.tokenBeg = tokenBeg;
}; };
@ -330,7 +330,7 @@ registerFilterClass(FilterPlain);
/******************************************************************************/ /******************************************************************************/
var FilterPlainPrefix0 = function(s) { const FilterPlainPrefix0 = function(s) {
this.s = s; this.s = s;
}; };
@ -362,7 +362,7 @@ registerFilterClass(FilterPlainPrefix0);
/******************************************************************************/ /******************************************************************************/
var FilterPlainPrefix1 = function(s) { const FilterPlainPrefix1 = function(s) {
this.s = s; this.s = s;
}; };
@ -394,7 +394,7 @@ registerFilterClass(FilterPlainPrefix1);
/******************************************************************************/ /******************************************************************************/
var FilterPlainHostname = function(s) { const FilterPlainHostname = function(s) {
this.s = s; this.s = s;
}; };
@ -429,7 +429,7 @@ registerFilterClass(FilterPlainHostname);
/******************************************************************************/ /******************************************************************************/
var FilterPlainLeftAnchored = function(s) { const FilterPlainLeftAnchored = function(s) {
this.s = s; this.s = s;
}; };
@ -461,7 +461,7 @@ registerFilterClass(FilterPlainLeftAnchored);
/******************************************************************************/ /******************************************************************************/
var FilterPlainRightAnchored = function(s) { const FilterPlainRightAnchored = function(s) {
this.s = s; this.s = s;
}; };
@ -493,7 +493,7 @@ registerFilterClass(FilterPlainRightAnchored);
/******************************************************************************/ /******************************************************************************/
var FilterExactMatch = function(s) { const FilterExactMatch = function(s) {
this.s = s; this.s = s;
}; };
@ -525,7 +525,7 @@ registerFilterClass(FilterExactMatch);
/******************************************************************************/ /******************************************************************************/
var FilterPlainHnAnchored = function(s) { const FilterPlainHnAnchored = function(s) {
this.s = s; this.s = s;
}; };
@ -558,7 +558,7 @@ registerFilterClass(FilterPlainHnAnchored);
/******************************************************************************/ /******************************************************************************/
var FilterGeneric = function(s, anchor) { const FilterGeneric = function(s, anchor) {
this.s = s; this.s = s;
this.anchor = anchor; this.anchor = anchor;
}; };
@ -603,7 +603,7 @@ registerFilterClass(FilterGeneric);
/******************************************************************************/ /******************************************************************************/
var FilterGenericHnAnchored = function(s) { const FilterGenericHnAnchored = function(s) {
this.s = s; this.s = s;
}; };
@ -642,7 +642,7 @@ registerFilterClass(FilterGenericHnAnchored);
/******************************************************************************/ /******************************************************************************/
var FilterGenericHnAndRightAnchored = function(s) { const FilterGenericHnAndRightAnchored = function(s) {
FilterGenericHnAnchored.call(this, s); FilterGenericHnAnchored.call(this, s);
}; };
@ -682,7 +682,7 @@ registerFilterClass(FilterGenericHnAndRightAnchored);
/******************************************************************************/ /******************************************************************************/
var FilterRegex = function(s) { const FilterRegex = function(s) {
this.re = s; this.re = s;
}; };
@ -723,7 +723,7 @@ registerFilterClass(FilterRegex);
// Filtering according to the origin. // Filtering according to the origin.
var FilterOrigin = function() { const FilterOrigin = function() {
}; };
FilterOrigin.prototype.wrapped = { FilterOrigin.prototype.wrapped = {
@ -766,7 +766,7 @@ FilterOrigin.prototype.compile = function() {
// *** start of specialized origin matchers // *** start of specialized origin matchers
var FilterOriginHit = function(domainOpt) { const FilterOriginHit = function(domainOpt) {
FilterOrigin.call(this); FilterOrigin.call(this);
this.hostname = domainOpt; this.hostname = domainOpt;
}; };
@ -792,7 +792,7 @@ FilterOriginHit.prototype = Object.create(FilterOrigin.prototype, {
// //
var FilterOriginMiss = function(domainOpt) { const FilterOriginMiss = function(domainOpt) {
FilterOrigin.call(this); FilterOrigin.call(this);
this.hostname = domainOpt.slice(1); this.hostname = domainOpt.slice(1);
}; };
@ -811,14 +811,15 @@ FilterOriginMiss.prototype = Object.create(FilterOrigin.prototype, {
var needle = this.hostname, haystack = pageHostnameRegister; var needle = this.hostname, haystack = pageHostnameRegister;
if ( haystack.endsWith(needle) === false ) { return true; } if ( haystack.endsWith(needle) === false ) { return true; }
var offset = haystack.length - needle.length; var offset = haystack.length - needle.length;
return offset !== 0 && haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */; return offset !== 0 &&
haystack.charCodeAt(offset - 1) !== 0x2E /* '.' */;
} }
}, },
}); });
// //
var FilterOriginHitSet = function(domainOpt) { const FilterOriginHitSet = function(domainOpt) {
FilterOrigin.call(this); FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128 this.domainOpt = domainOpt.length < 128
? domainOpt ? domainOpt
@ -840,17 +841,17 @@ FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( this.oneOf === null ) { if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
this.oneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt); this.oneOf = hnTrieManager.fromDomainOpt(this.domainOpt);
} }
return this.oneOf.matches(pageHostnameRegister); return this.oneOf.matches(pageHostnameRegister) === 1;
} }
}, },
}); });
// //
var FilterOriginMissSet = function(domainOpt) { const FilterOriginMissSet = function(domainOpt) {
FilterOrigin.call(this); FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128 this.domainOpt = domainOpt.length < 128
? domainOpt ? domainOpt
@ -872,17 +873,19 @@ FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( this.noneOf === null ) { if ( hnTrieManager.isValidRef(this.noneOf) === false ) {
this.noneOf = HNTrieBuilder.fromDomainOpt(this.domainOpt.replace(/~/g, '')); this.noneOf = hnTrieManager.fromDomainOpt(
this.domainOpt.replace(/~/g, '')
);
} }
return this.noneOf.matches(pageHostnameRegister) === false; return this.noneOf.matches(pageHostnameRegister) === 0;
} }
}, },
}); });
// //
var FilterOriginMixedSet = function(domainOpt) { const FilterOriginMixedSet = function(domainOpt) {
FilterOrigin.call(this); FilterOrigin.call(this);
this.domainOpt = domainOpt.length < 128 this.domainOpt = domainOpt.length < 128
? domainOpt ? domainOpt
@ -903,20 +906,16 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
init: { init: {
value: function() { value: function() {
var oneOf = [], noneOf = [], let oneOf = [], noneOf = [];
hostnames = this.domainOpt.split('|'), for ( let hostname of this.domainOpt.split('|') ) {
i = hostnames.length,
hostname;
while ( i-- ) {
hostname = hostnames[i];
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
noneOf.push(hostname.slice(1)); noneOf.push(hostname.slice(1));
} else { } else {
oneOf.push(hostname); oneOf.push(hostname);
} }
} }
this.oneOf = HNTrieBuilder.fromIterable(oneOf); this.oneOf = hnTrieManager.fromIterable(oneOf);
this.noneOf = HNTrieBuilder.fromIterable(noneOf); this.noneOf = hnTrieManager.fromIterable(noneOf);
} }
}, },
toDomainOpt: { toDomainOpt: {
@ -926,10 +925,12 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( this.oneOf === null ) { this.init(); } if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
var needle = pageHostnameRegister; this.init();
return this.oneOf.matches(needle) && }
this.noneOf.matches(needle) === false; let needle = pageHostnameRegister;
return this.oneOf.matches(needle) === 1 &&
this.noneOf.matches(needle) === 0;
} }
}, },
}); });
@ -981,7 +982,7 @@ registerFilterClass(FilterOrigin);
/******************************************************************************/ /******************************************************************************/
var FilterDataHolder = function(dataType, dataStr) { const FilterDataHolder = function(dataType, dataStr) {
this.dataType = dataType; this.dataType = dataType;
this.dataStr = dataStr; this.dataStr = dataStr;
this.wrapped = undefined; this.wrapped = undefined;
@ -1024,7 +1025,7 @@ registerFilterClass(FilterDataHolder);
// Helper class for storing instances of FilterDataHolder. // Helper class for storing instances of FilterDataHolder.
var FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) { const FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
this.categoryBits = categoryBits; this.categoryBits = categoryBits;
this.tokenHash = tokenHash; this.tokenHash = tokenHash;
this.filter = filterFromCompiledData(fdata); this.filter = filterFromCompiledData(fdata);
@ -1047,7 +1048,7 @@ FilterDataHolderEntry.load = function(data) {
// Dictionary of hostnames // Dictionary of hostnames
// //
var FilterHostnameDict = function() { const FilterHostnameDict = function() {
this.h = ''; // short-lived register this.h = ''; // short-lived register
this.dict = new Set(); this.dict = new Set();
}; };
@ -1138,7 +1139,7 @@ registerFilterClass(FilterHostnameDict);
/******************************************************************************/ /******************************************************************************/
var FilterPair = function(a, b) { const FilterPair = function(a, b) {
this.f1 = a; this.f1 = a;
this.f2 = b; this.f2 = b;
this.f = null; this.f = null;
@ -1217,7 +1218,7 @@ registerFilterClass(FilterPair);
/******************************************************************************/ /******************************************************************************/
var FilterBucket = function(a, b, c) { const FilterBucket = function(a, b, c) {
this.filters = []; this.filters = [];
this.f = null; this.f = null;
if ( a !== undefined ) { if ( a !== undefined ) {
@ -1315,7 +1316,7 @@ registerFilterClass(FilterBucket);
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
var FilterParser = function() { const FilterParser = function() {
this.cantWebsocket = vAPI.cantWebsocket; this.cantWebsocket = vAPI.cantWebsocket;
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/; this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i; this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
@ -1933,7 +1934,7 @@ FilterParser.prototype.makeToken = function() {
/******************************************************************************/ /******************************************************************************/
/******************************************************************************/ /******************************************************************************/
var FilterContainer = function() { const FilterContainer = function() {
this.reIsGeneric = /[\^\*]/; this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser(); this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer; this.urlTokenizer = µb.urlTokenizer;
@ -1960,6 +1961,9 @@ FilterContainer.prototype.reset = function() {
this.dataFilters = new Map(); this.dataFilters = new Map();
this.filterParser.reset(); this.filterParser.reset();
// This will invalidate all hn tries throughout uBO:
hnTrieManager.reset();
// Runtime registers // Runtime registers
this.cbRegister = undefined; this.cbRegister = undefined;
this.thRegister = undefined; this.thRegister = undefined;
@ -2052,6 +2056,15 @@ FilterContainer.prototype.freeze = function() {
/******************************************************************************/ /******************************************************************************/
// This is necessary for when the filtering engine readiness will depend
// on asynchronous operations (ex.: when loading a wasm module).
FilterContainer.prototype.readyToUse = function() {
return hnTrieManager.readyToUse();
};
/******************************************************************************/
FilterContainer.prototype.toSelfie = function() { FilterContainer.prototype.toSelfie = function() {
let categoriesToSelfie = function(categoryMap) { let categoriesToSelfie = function(categoryMap) {
let selfie = []; let selfie = [];
@ -2250,7 +2263,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(
// Only static filter with an explicit type can be redirected. If we reach // Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type. // this point, it's because there is one or more explicit type.
if ( parsed.badFilter === false && parsed.redirect ) { if ( parsed.redirect ) {
let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw); let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) ) { if ( Array.isArray(redirects) ) {
for ( let redirect of redirects ) { for ( let redirect of redirects ) {
@ -2292,26 +2305,24 @@ FilterContainer.prototype.fromCompiledContent = function(reader) {
FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) { FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out, outlog) {
if ( this.dataFilters.length === 0 ) { return; } if ( this.dataFilters.length === 0 ) { return; }
var url = this.urlTokenizer.setURL(requestURL); let url = this.urlTokenizer.setURL(requestURL);
requestHostnameRegister = µb.URI.hostnameFromURI(url); pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
// We need to visit ALL the matching filters. // We need to visit ALL the matching filters.
var toAddImportant = new Map(), let toAddImportant = new Map(),
toAdd = new Map(), toAdd = new Map(),
toRemove = new Map(); toRemove = new Map();
var entry, f, let tokenHashes = this.urlTokenizer.getTokens(),
tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
i = 0; i = 0;
while ( i < 32 ) { while ( i < 32 ) {
tokenHash = tokenHashes[i++]; let tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; } if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++]; let tokenOffset = tokenHashes[i++];
entry = this.dataFilters.get(tokenHash); let entry = this.dataFilters.get(tokenHash);
while ( entry !== undefined ) { while ( entry !== undefined ) {
f = entry.filter; let f = entry.filter;
if ( f.match(url, tokenOffset) === true ) { if ( f.match(url, tokenOffset) === true ) {
if ( entry.categoryBits & 0x001 ) { if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry); toRemove.set(f.dataStr, entry);
@ -2324,9 +2335,9 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
entry = entry.next; entry = entry.next;
} }
} }
entry = this.dataFilters.get(this.noTokenHash); let entry = this.dataFilters.get(this.noTokenHash);
while ( entry !== undefined ) { while ( entry !== undefined ) {
f = entry.filter; let f = entry.filter;
if ( f.match(url) === true ) { if ( f.match(url) === true ) {
if ( entry.categoryBits & 0x001 ) { if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry); toRemove.set(f.dataStr, entry);
@ -2342,12 +2353,11 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; } if ( toAddImportant.size === 0 && toAdd.size === 0 ) { return; }
// Remove entries overriden by other filters. // Remove entries overriden by other filters.
var key; for ( let key of toAddImportant.keys() ) {
for ( key of toAddImportant.keys() ) {
toAdd.delete(key); toAdd.delete(key);
toRemove.delete(key); toRemove.delete(key);
} }
for ( key of toRemove.keys() ) { for ( let key of toRemove.keys() ) {
if ( key === '' ) { if ( key === '' ) {
toAdd.clear(); toAdd.clear();
break; break;
@ -2355,26 +2365,25 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
toAdd.delete(key); toAdd.delete(key);
} }
var logData; for ( let entry of toAddImportant ) {
for ( entry of toAddImportant ) {
out.push(entry[0]); out.push(entry[0]);
if ( outlog === undefined ) { continue; } if ( outlog === undefined ) { continue; }
logData = entry[1].logData(); let logData = entry[1].logData();
logData.source = 'static'; logData.source = 'static';
logData.result = 1; logData.result = 1;
outlog.push(logData); outlog.push(logData);
} }
for ( entry of toAdd ) { for ( let entry of toAdd ) {
out.push(entry[0]); out.push(entry[0]);
if ( outlog === undefined ) { continue; } if ( outlog === undefined ) { continue; }
logData = entry[1].logData(); let logData = entry[1].logData();
logData.source = 'static'; logData.source = 'static';
logData.result = 1; logData.result = 1;
outlog.push(logData); outlog.push(logData);
} }
if ( outlog !== undefined ) { if ( outlog !== undefined ) {
for ( entry of toRemove.values()) { for ( let entry of toRemove.values()) {
logData = entry.logData(); let logData = entry.logData();
logData.source = 'static'; logData.source = 'static';
logData.result = 2; logData.result = 2;
outlog.push(logData); outlog.push(logData);
@ -2389,20 +2398,19 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
FilterContainer.prototype.matchTokens = function(bucket, url) { FilterContainer.prototype.matchTokens = function(bucket, url) {
// Hostname-only filters // Hostname-only filters
var f = bucket.get(this.dotTokenHash); let f = bucket.get(this.dotTokenHash);
if ( f !== undefined && f.match() === true ) { if ( f !== undefined && f.match() === true ) {
this.thRegister = this.dotTokenHash; this.thRegister = this.dotTokenHash;
this.fRegister = f; this.fRegister = f;
return true; return true;
} }
var tokenHashes = this.urlTokenizer.getTokens(), let tokenHashes = this.urlTokenizer.getTokens(),
tokenHash, tokenOffset,
i = 0; i = 0;
for (;;) { for (;;) {
tokenHash = tokenHashes[i++]; let tokenHash = tokenHashes[i++];
if ( tokenHash === 0 ) { break; } if ( tokenHash === 0 ) { break; }
tokenOffset = tokenHashes[i++]; let tokenOffset = tokenHashes[i++];
f = bucket.get(tokenHash); f = bucket.get(tokenHash);
if ( f !== undefined && f.match(url, tokenOffset) === true ) { if ( f !== undefined && f.match(url, tokenOffset) === true ) {
this.thRegister = tokenHash; this.thRegister = tokenHash;
@ -2437,8 +2445,10 @@ FilterContainer.prototype.matchStringGenericHide = function(requestURL) {
let url = this.urlTokenizer.setURL(requestURL); let url = this.urlTokenizer.setURL(requestURL);
// https://github.com/gorhill/uBlock/issues/2225 // https://github.com/gorhill/uBlock/issues/2225
// Important: this is used by FilterHostnameDict.match(). // Important:
requestHostnameRegister = µb.URI.hostnameFromURI(url); // - `pageHostnameRegister` is used by FilterOrigin.matchOrigin().
// - `requestHostnameRegister` is used by FilterHostnameDict.match().
pageHostnameRegister = requestHostnameRegister = µb.URI.hostnameFromURI(url);
let bucket = this.categories.get(genericHideException); let bucket = this.categories.get(genericHideException);
if ( !bucket || this.matchTokens(bucket, url) === false ) { if ( !bucket || this.matchTokens(bucket, url) === false ) {
@ -2548,7 +2558,7 @@ FilterContainer.prototype.matchString = function(context) {
// https://github.com/chrisaljoudi/uBlock/issues/519 // https://github.com/chrisaljoudi/uBlock/issues/519
// Use exact type match for anything beyond `other` // Use exact type match for anything beyond `other`
// Also, be prepared to support unknown types // Also, be prepared to support unknown types
var type = typeNameToTypeValue[context.requestType]; let type = typeNameToTypeValue[context.requestType];
if ( type === undefined ) { if ( type === undefined ) {
type = otherTypeBitValue; type = otherTypeBitValue;
} else if ( type === 0 || type > otherTypeBitValue ) { } else if ( type === 0 || type > otherTypeBitValue ) {
@ -2577,7 +2587,7 @@ FilterContainer.prototype.matchString = function(context) {
// filter. // filter.
// Prime tokenizer: we get a normalized URL in return. // Prime tokenizer: we get a normalized URL in return.
var url = this.urlTokenizer.setURL(context.requestURL); let url = this.urlTokenizer.setURL(context.requestURL);
// These registers will be used by various filters // These registers will be used by various filters
pageHostnameRegister = context.pageHostname || ''; pageHostnameRegister = context.pageHostname || '';
@ -2585,10 +2595,10 @@ FilterContainer.prototype.matchString = function(context) {
this.fRegister = null; this.fRegister = null;
var party = isFirstParty(context.pageDomain, context.requestHostname) let party = isFirstParty(context.pageDomain, context.requestHostname)
? FirstParty ? FirstParty
: ThirdParty; : ThirdParty;
var categories = this.categories, let categories = this.categories,
catBits, bucket; catBits, bucket;
// https://github.com/chrisaljoudi/uBlock/issues/139 // https://github.com/chrisaljoudi/uBlock/issues/139

View File

@ -604,9 +604,7 @@
µBlock.loadFilterLists = function(callback) { µBlock.loadFilterLists = function(callback) {
// Callers are expected to check this first. // Callers are expected to check this first.
if ( this.loadingFilterLists ) { if ( this.loadingFilterLists ) { return; }
return;
}
this.loadingFilterLists = true; this.loadingFilterLists = true;
var µb = this, var µb = this,
@ -961,38 +959,31 @@
/******************************************************************************/ /******************************************************************************/
µBlock.loadPublicSuffixList = function(callback) { µBlock.loadPublicSuffixList = function() {
var µb = this, return new Promise(resolve => {
assetKey = µb.pslAssetKey, // start of executor
compiledAssetKey = 'compiled/' + assetKey; this.assets.get('compiled/' + this.pslAssetKey, details => {
let selfie;
if ( typeof callback !== 'function' ) {
callback = this.noopFunc;
}
var onRawListLoaded = function(details) {
if ( details.content !== '' ) {
µb.compilePublicSuffixList(details.content);
}
callback();
};
var onCompiledListLoaded = function(details) {
var selfie;
try { try {
selfie = JSON.parse(details.content); selfie = JSON.parse(details.content);
} catch (ex) { } catch (ex) {
} }
if ( if (
selfie === undefined || selfie instanceof Object &&
publicSuffixList.fromSelfie(selfie) === false publicSuffixList.fromSelfie(selfie)
) { ) {
µb.assets.get(assetKey, onRawListLoaded); resolve();
return; return;
} }
callback(); this.assets.get(this.pslAssetKey, details => {
}; if ( details.content !== '' ) {
this.compilePublicSuffixList(details.content);
this.assets.get(compiledAssetKey, onCompiledListLoaded); }
resolve();
});
});
// end of executor
});
}; };
/******************************************************************************/ /******************************************************************************/

24
src/js/wasm/README.md Normal file
View File

@ -0,0 +1,24 @@
### For code reviewers
All `wasm` files in that directory where created by compiling the
corresponding `wat` file using the command (using `hntrie.wat`/`hntrie.wasm`
as example):
wat2wasm hntrie.wat -o hntrie.wasm
Assuming:
- The command is executed from within the present directory.
### `wat2wasm` tool
The `wat2wasm` tool can be downloaded from an official WebAssembly project:
<https://github.com/WebAssembly/wabt/releases>.
### `wat2wasm` tool online
You can also use the following online `wat2wasm` tool:
<https://webassembly.github.io/wabt/demo/wat2wasm/>.
Just paste the whole content of the `wat` file to compile into the WAT pane.
Click "Download" button to retrieve the resulting `wasm` file.

BIN
src/js/wasm/hntrie.wasm Normal file

Binary file not shown.

200
src/js/wasm/hntrie.wat Normal file
View File

@ -0,0 +1,200 @@
;;
;; uBlock Origin - a browser extension to block requests.
;; Copyright (C) 2018-present Raymond Hill
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see {http://www.gnu.org/licenses/}.
;;
;; Home: https://github.com/gorhill/uBlock
;; File: hntrie.wat
;; Description: WebAssembly code used by src/js/hntrie.js
;; How to compile: See README.md in this directory.
(module
;;
;; module start
;;
;; (func $log (import "imports" "log") (param i32 i32 i32))
(memory (import "imports" "memory") 1)
;;
;; Public functions
;;
;;
;; unsigned int matches(offset)
;;
;; Test whether the currently set needle matches the trie at specified offset.
;;
;; Memory layout, byte offset:
;; 0-254: encoded needle (ASCII)
;; 255 : needle length
;; 256- : tries
;;
(func (export "matches")
(param $itrie i32)
(result i32) ;; result: 0 = miss, 1 = hit
(local $ineedle i32) ;; current needle offset
(local $nchar i32) ;; needle char being processed
(local $tchar i32) ;; trie char being processed
(local $lxtra i32)
(local $ixtra i32)
i32.const 255
i32.load8_u
set_local $ineedle
loop $nextNeedleChar
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; let nchar = ineedle === -1 ? 0 : buf[ineedle];
i32.const 0
i32.lt_s
if
i32.const 0
set_local $nchar
else
get_local $ineedle
i32.load8_u
set_local $nchar
end
block $trieCharEqNeedleChar loop $nextTrieChar
;; let tchar = buf[itrie+8];
get_local $itrie
i32.load8_u offset=8
tee_local $tchar
;; if ( tchar === nchar ) { break; }
get_local $nchar
i32.eq
br_if $trieCharEqNeedleChar
;; if ( tchar === 0 && nchar === 0x2E ) { return 1; }
get_local $tchar
i32.eqz
if
get_local $nchar
i32.const 0x2E
i32.eq
if
i32.const 1
return
end
end
;; itrie = buf32[itrie >>> 2];
get_local $itrie
i32.load
tee_local $itrie
;; if ( itrie === 0 ) { return 0; }
i32.eqz
if
i32.const 0
return
end
br $nextTrieChar
end end
;; if ( nchar === 0 ) { return 1; }
get_local $nchar
i32.eqz
if
i32.const 1
return
end
;; let lxtra = buf[itrie+9];
get_local $itrie
i32.load8_u offset=9
tee_local $lxtra
i32.eqz
if else
;; if ( lxtra > ineedle ) { return 0; }
get_local $lxtra
get_local $ineedle
i32.gt_u
if
i32.const 0
return
end
;; let ixtra = itrie + 10;
get_local $itrie
i32.const 10
i32.add
tee_local $ixtra
;; lxtra += ixtra;
get_local $lxtra
i32.add
set_local $lxtra
;; do {
block $noMoreExtraChars loop
;; ineedle -= 1;
get_local $ineedle
i32.const -1
i32.add
tee_local $ineedle
;; if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
i32.load8_u
get_local $ixtra
i32.load8_u
i32.ne
if
i32.const 0
return
end
;; ixtra += 1;
get_local $ixtra
i32.const 1
i32.add
tee_local $ixtra
;; while ( ixtra !== lxtra ) {
get_local $lxtra
i32.eq
br_if $noMoreExtraChars
br 0
end end
end
;; itrie = buf32[itrie + 4 >>> 2];
get_local $itrie
i32.load offset=4
tee_local $itrie
;; if ( itrie === 0 ) {
i32.eqz
if
;; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
get_local $ineedle
i32.eqz
if
i32.const 1
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const 1
return
end
i32.const 0
return
end
br 0
end
i32.const 0
)
;;
;; module end
;;
)

479
test/hnset-benchmark.html Normal file

File diff suppressed because one or more lines are too long

45866
test/hntrie-test.html Normal file

File diff suppressed because it is too large Load Diff