mirror of https://github.com/gorhill/uBlock.git
3rd-gen hntrie, suitable for large set of hostnames
This commit is contained in:
parent
bf28a83e2d
commit
1b6fea16da
|
@ -0,0 +1,271 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body style="font: 14px sans-serif">
|
||||||
|
<h1>Benchmark of large hostname-lookup from small to large set: Set, HNTrie</h1>
|
||||||
|
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
|
||||||
|
<div id="results-0" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-1" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-2" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-3" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-4" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-5" style="white-space:pre;font-family:mono"></div>
|
||||||
|
<div id="results-6" style="white-space:pre;font-family:mono"></div>
|
||||||
|
|
||||||
|
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
|
||||||
|
<!-- <script src="../../src/js/hntrie.js"></script> -->
|
||||||
|
<script src="hostname-pool.js"></script>
|
||||||
|
|
||||||
|
<script src="https://cdn.jsdelivr.net/lodash/4.17.2/lodash.min.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/platform.js/1.3.3/platform.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/benchmarkjs/2.1.2/benchmark.js"></script>
|
||||||
|
<script>
|
||||||
|
const randomHostname = function() {
|
||||||
|
return hostnamePool[Math.floor(Math.random() * hostnamePool.length)];
|
||||||
|
};
|
||||||
|
|
||||||
|
const randomNeedle = function() {
|
||||||
|
let needle = randomHostname();
|
||||||
|
const pos = needle.lastIndexOf('.');
|
||||||
|
if ( pos !== -1 ) {
|
||||||
|
needle = Math.random().toString(36).slice(2) + needle.slice(pos);
|
||||||
|
}
|
||||||
|
if ( Math.random() < 0.5 ) {
|
||||||
|
needle = Math.random().toString(36).slice(2, 6) + '.' + needle;
|
||||||
|
}
|
||||||
|
return needle;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create hostname dictionary of all sizes (from 2 to 1024 at most)
|
||||||
|
const hostnameLists = (function() {
|
||||||
|
const dicts = [];
|
||||||
|
let n = hostnamePool.length;
|
||||||
|
while ( n > 1 ) {
|
||||||
|
const dict = [];
|
||||||
|
for ( let i = 0; i < n; i++ ) {
|
||||||
|
dict.push(randomHostname());
|
||||||
|
}
|
||||||
|
dicts.unshift(dict);
|
||||||
|
n = n >>> 2;
|
||||||
|
}
|
||||||
|
return dicts;
|
||||||
|
})();
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
var setBasedDictCreate = function(hostnames) {
|
||||||
|
return new Set(hostnames);
|
||||||
|
};
|
||||||
|
|
||||||
|
var setBasedDictTest = function(haystack, needle) {
|
||||||
|
for (;;) {
|
||||||
|
if ( haystack.has(needle) ) { return true; }
|
||||||
|
const pos = needle.indexOf('.');
|
||||||
|
if ( pos === -1 ) { break; }
|
||||||
|
needle = needle.slice(pos + 1);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
const hnBigTrieJS = new HNTrieContainer();
|
||||||
|
const hnBigTrieWASM = new HNTrieContainer();
|
||||||
|
|
||||||
|
const trieBasedDictCreateJS = function(hostnames) {
|
||||||
|
return hnBigTrieJS.fromIterable(hostnames, 'addJS');
|
||||||
|
}
|
||||||
|
|
||||||
|
const trieBasedDictTest = function(haystack, needle) {
|
||||||
|
return haystack.matchesJS(needle);
|
||||||
|
};
|
||||||
|
|
||||||
|
const trieBasedDictCreateWASM = function(hostnames) {
|
||||||
|
return hnBigTrieWASM.fromIterable(hostnames, 'addWASM');
|
||||||
|
}
|
||||||
|
|
||||||
|
const trieBasedDictTestWASM = function(haystack, needle) {
|
||||||
|
return haystack.matchesWASM(needle);
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
const gBenchmarks = [ null ];
|
||||||
|
let gWhich;
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
function stdout(which, text) {
|
||||||
|
if ( which > 0 ) {
|
||||||
|
which = ((which - 1) % 3) + 1;
|
||||||
|
}
|
||||||
|
var r = document.querySelector('#results-' + which);
|
||||||
|
if ( text === '' ) {
|
||||||
|
r.innerHTML = '';
|
||||||
|
} else {
|
||||||
|
r.innerHTML += text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function doBenchmark(which) {
|
||||||
|
stdout(0, '');
|
||||||
|
stdout(0, 'Benchmarking, the higher ops/sec the better.\n');
|
||||||
|
stdout(0, Benchmark.platform.toString() + '.');
|
||||||
|
stdout(0, '\n\n');
|
||||||
|
stdout(1, '');
|
||||||
|
stdout(2, '');
|
||||||
|
stdout(3, '');
|
||||||
|
gWhich = which;
|
||||||
|
gBenchmarks[gWhich].run({ 'async': true });
|
||||||
|
}
|
||||||
|
|
||||||
|
function nextBenchmark() {
|
||||||
|
stdout(gWhich, 'Done.\n\n');
|
||||||
|
gWhich += 1;
|
||||||
|
var bms = gBenchmarks[gWhich];
|
||||||
|
if ( bms ) {
|
||||||
|
bms.run({ 'async': true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function exitBenchmark() {
|
||||||
|
stdout(gWhich, 'Done.\n\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
function initBenchmarks() {
|
||||||
|
gBenchmarks.push((function() {
|
||||||
|
let dicts = [];
|
||||||
|
let bigTrieDictsSerialized;
|
||||||
|
|
||||||
|
const createDict = function(fn) {
|
||||||
|
const out = [];
|
||||||
|
for ( let i = 0; i < hostnameLists.length; i++ ) {
|
||||||
|
out[i] = fn(hostnameLists[i]);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
};
|
||||||
|
|
||||||
|
var bms = new Benchmark.Suite();
|
||||||
|
bms
|
||||||
|
.add(' - Set-based', function() {
|
||||||
|
dicts = createDict(setBasedDictCreate);
|
||||||
|
})
|
||||||
|
.add(' - Trie-based (JS)', function() {
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
dicts = createDict(trieBasedDictCreateJS);
|
||||||
|
})
|
||||||
|
.add(' - Trie-based (WASM)', function() {
|
||||||
|
hnBigTrieWASM.reset();
|
||||||
|
dicts = createDict(trieBasedDictCreateWASM);
|
||||||
|
})
|
||||||
|
.add(' - Trie-based (unserialized)', function() {
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
hnBigTrieJS.unserialize(bigTrieDictsSerialized);
|
||||||
|
})
|
||||||
|
.on('start', function() {
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
createDict(trieBasedDictCreateJS);
|
||||||
|
bigTrieDictsSerialized = hnBigTrieJS.serialize();
|
||||||
|
stdout(gWhich, '');
|
||||||
|
stdout(gWhich, 'Create dictionaries\n');
|
||||||
|
})
|
||||||
|
.on('cycle', function(event) {
|
||||||
|
stdout(gWhich, String(event.target) + '\n');
|
||||||
|
})
|
||||||
|
.on('complete', function() {
|
||||||
|
dicts = [];
|
||||||
|
bigTrieDictsSerialized = undefined;
|
||||||
|
exitBenchmark();
|
||||||
|
});
|
||||||
|
|
||||||
|
return bms;
|
||||||
|
})());
|
||||||
|
|
||||||
|
const lookupCount = 100;
|
||||||
|
|
||||||
|
gBenchmarks.push((function() {
|
||||||
|
const bms = new Benchmark.Suite();
|
||||||
|
const needles = [];
|
||||||
|
|
||||||
|
let setDicts = [];
|
||||||
|
let bigTrieDicts = [];
|
||||||
|
let results;
|
||||||
|
|
||||||
|
const lookupDict = function(dicts, fn) {
|
||||||
|
for ( let i = 0; i < needles.length; i++ ) {
|
||||||
|
const needle = needles[i];
|
||||||
|
for ( const dict of dicts ) {
|
||||||
|
results[i] = fn(dict, needle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
bms
|
||||||
|
.add(' - Set-based', function() {
|
||||||
|
lookupDict(setDicts, setBasedDictTest);
|
||||||
|
})
|
||||||
|
.add(' - Trie-based JS', function() {
|
||||||
|
lookupDict(bigTrieDicts, trieBasedDictTest);
|
||||||
|
})
|
||||||
|
.add(' - Trie-based WASM', function() {
|
||||||
|
lookupDict(bigTrieDicts, trieBasedDictTestWASM);
|
||||||
|
})
|
||||||
|
.on('start', function() {
|
||||||
|
for ( let i = 0; i < lookupCount; i++ ) {
|
||||||
|
needles[i] = randomNeedle();
|
||||||
|
}
|
||||||
|
setDicts = [];
|
||||||
|
bigTrieDicts = [];
|
||||||
|
results = [];
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
for ( const hostnameList of hostnameLists ) {
|
||||||
|
setDicts.push(setBasedDictCreate(hostnameList));
|
||||||
|
bigTrieDicts.push(trieBasedDictCreateJS(hostnameList));
|
||||||
|
}
|
||||||
|
hnBigTrieJS.optimize();
|
||||||
|
stdout(gWhich, '');
|
||||||
|
stdout(
|
||||||
|
gWhich,
|
||||||
|
'Test ' + lookupCount +
|
||||||
|
' needles against ' + setDicts.length +
|
||||||
|
' dictionaries with size between ' + hostnameLists[0].length +
|
||||||
|
' and ' + hostnameLists[hostnameLists.length-1].length +
|
||||||
|
' hostnames\n'
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.on('cycle', function(event) {
|
||||||
|
stdout(gWhich, String(event.target) + '\n');
|
||||||
|
})
|
||||||
|
.on('complete', ( ) => {
|
||||||
|
setDicts = bigTrieDicts = results = [];
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
exitBenchmark();
|
||||||
|
});
|
||||||
|
|
||||||
|
return bms;
|
||||||
|
})());
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
Promise.all([
|
||||||
|
hnBigTrieJS.readyToUse(),
|
||||||
|
hnBigTrieWASM.readyToUse()
|
||||||
|
]).then(( ) => {
|
||||||
|
initBenchmarks();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('createBenchmark').onclick = function() {
|
||||||
|
doBenchmark(1);
|
||||||
|
};
|
||||||
|
document.getElementById('lookupBenchmark').onclick = function() {
|
||||||
|
doBenchmark(2);
|
||||||
|
};
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -5,7 +5,7 @@
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
</head>
|
</head>
|
||||||
<body style="font: 14px sans-serif">
|
<body style="font: 14px sans-serif">
|
||||||
<h1>Benchmark of hostname-lookup data structures: Set, RegExp, HNTrie</h1>
|
<h1>Benchmark of hostname-lookup from small to medium set: Set, RegExp, HNTrie</h1>
|
||||||
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
|
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
|
||||||
<div id="results-0" style="white-space:pre;font-family:mono"></div>
|
<div id="results-0" style="white-space:pre;font-family:mono"></div>
|
||||||
<div id="results-1" style="white-space:pre;font-family:mono"></div>
|
<div id="results-1" style="white-space:pre;font-family:mono"></div>
|
||||||
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
|
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
|
||||||
<script src="https://rawcdn.githack.com/gorhill/uBlock/c3b0fd31f64bd7ffecdd282fb1208fe07aac3eb0/src/js/hntrie.js"></script>
|
<script src="https://rawcdn.githack.com/gorhill/uBlock/c3b0fd31f64bd7ffecdd282fb1208fe07aac3eb0/src/js/hntrie.js"></script>
|
||||||
|
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
|
||||||
<!-- <script src="../../src/js/hntrie.js"></script> -->
|
<!-- <script src="../../src/js/hntrie.js"></script> -->
|
||||||
<script src="hostname-pool.js"></script>
|
<script src="hostname-pool.js"></script>
|
||||||
|
|
||||||
|
@ -202,6 +203,27 @@ var trieBasedDictTestWASM = function(haystack, needle) {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
const hnBigTrieJS = new HNTrieContainer();
|
||||||
|
const hnBigTrieWASM = new HNTrieContainer();
|
||||||
|
|
||||||
|
const bigtrieBasedDictCreateJS = function(domainOpt) {
|
||||||
|
return hnBigTrieJS.fromIterable(domainOpt.split('|'), 'addJS');
|
||||||
|
}
|
||||||
|
|
||||||
|
const bigtrieBasedDictTestJS = function(haystack, needle) {
|
||||||
|
return haystack.matchesJS(needle);
|
||||||
|
};
|
||||||
|
|
||||||
|
const bigtrieBasedDictCreateWASM = function(domainOpt) {
|
||||||
|
return hnBigTrieWASM.fromIterable(domainOpt.split('|'), 'addWASM');
|
||||||
|
}
|
||||||
|
|
||||||
|
const bigtrieBasedDictTestWASM = function(haystack, needle) {
|
||||||
|
return haystack.matchesWASM(needle);
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
const gBenchmarks = [ null ];
|
const gBenchmarks = [ null ];
|
||||||
let gWhich;
|
let gWhich;
|
||||||
|
|
||||||
|
@ -258,19 +280,23 @@ function initBenchmarks() {
|
||||||
|
|
||||||
var bms = new Benchmark.Suite();
|
var bms = new Benchmark.Suite();
|
||||||
bms
|
bms
|
||||||
.add(' - Set-based', function() {
|
.add(' - Set-based', function() {
|
||||||
createDict(setBasedDictCreate);
|
createDict(setBasedDictCreate);
|
||||||
})
|
})
|
||||||
.add(' - Regex-based', function() {
|
.add(' - Regex-based', function() {
|
||||||
createDict(regexBasedDictCreate);
|
createDict(regexBasedDictCreate);
|
||||||
})
|
})
|
||||||
.add(' - Trie-based (1st-gen)', function() {
|
.add(' - Trie-based (1st-gen)', function() {
|
||||||
createDict(oldTrieBasedDictCreate);
|
createDict(oldTrieBasedDictCreate);
|
||||||
})
|
})
|
||||||
.add(' - Trie-based (2nd-gen)', function() {
|
.add(' - Trie-based (2nd-gen)', function() {
|
||||||
hnTrieManager.reset();
|
hnTrieManager.reset();
|
||||||
createDict(trieBasedDictCreate);
|
createDict(trieBasedDictCreate);
|
||||||
})
|
})
|
||||||
|
.add(' - Trie-based JS (3rd-gen)', function() {
|
||||||
|
hnBigTrieJS.reset();
|
||||||
|
createDict(bigtrieBasedDictCreateJS);
|
||||||
|
})
|
||||||
.on('start', function() {
|
.on('start', function() {
|
||||||
dicts = [];
|
dicts = [];
|
||||||
stdout(gWhich, '');
|
stdout(gWhich, '');
|
||||||
|
@ -281,6 +307,13 @@ function initBenchmarks() {
|
||||||
})
|
})
|
||||||
.on('complete', exitBenchmark);
|
.on('complete', exitBenchmark);
|
||||||
|
|
||||||
|
if ( hnBigTrieWASM.addWASM !== null ) {
|
||||||
|
bms.add(' - Trie-based WASM (3rd-gen)', function() {
|
||||||
|
hnBigTrieWASM.reset();
|
||||||
|
createDict(bigtrieBasedDictCreateWASM);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
return bms;
|
return bms;
|
||||||
})());
|
})());
|
||||||
|
|
||||||
|
@ -294,6 +327,7 @@ function initBenchmarks() {
|
||||||
let regexDicts;
|
let regexDicts;
|
||||||
let oldTrieDicts;
|
let oldTrieDicts;
|
||||||
let newTrieDicts;
|
let newTrieDicts;
|
||||||
|
let bigTrieDicts;
|
||||||
let results;
|
let results;
|
||||||
|
|
||||||
const lookupDict = function(dicts, fn) {
|
const lookupDict = function(dicts, fn) {
|
||||||
|
@ -315,9 +349,6 @@ function initBenchmarks() {
|
||||||
.add(' - Trie-based (1st-gen)', function() {
|
.add(' - Trie-based (1st-gen)', function() {
|
||||||
lookupDict(oldTrieDicts, oldTrieBasedDictTest);
|
lookupDict(oldTrieDicts, oldTrieBasedDictTest);
|
||||||
})
|
})
|
||||||
.add(' - Trie-based JS (2nd-gen)', function() {
|
|
||||||
lookupDict(newTrieDicts, trieBasedDictTest);
|
|
||||||
})
|
|
||||||
.on('start', function() {
|
.on('start', function() {
|
||||||
for ( let i = 0; i < lookupCount; i++ ) {
|
for ( let i = 0; i < lookupCount; i++ ) {
|
||||||
needles[i] = randomNeedle();
|
needles[i] = randomNeedle();
|
||||||
|
@ -326,6 +357,7 @@ function initBenchmarks() {
|
||||||
regexDicts = [];
|
regexDicts = [];
|
||||||
oldTrieDicts = []
|
oldTrieDicts = []
|
||||||
newTrieDicts = []
|
newTrieDicts = []
|
||||||
|
bigTrieDicts = []
|
||||||
results = [];
|
results = [];
|
||||||
hnTrieManager.reset();
|
hnTrieManager.reset();
|
||||||
for ( const domainOpt of domainOpts ) {
|
for ( const domainOpt of domainOpts ) {
|
||||||
|
@ -333,6 +365,7 @@ function initBenchmarks() {
|
||||||
regexDicts.push(regexBasedDictCreate(domainOpt));
|
regexDicts.push(regexBasedDictCreate(domainOpt));
|
||||||
oldTrieDicts.push(oldTrieBasedDictCreate(domainOpt));
|
oldTrieDicts.push(oldTrieBasedDictCreate(domainOpt));
|
||||||
newTrieDicts.push(trieBasedDictCreate(domainOpt));
|
newTrieDicts.push(trieBasedDictCreate(domainOpt));
|
||||||
|
bigTrieDicts.push(bigtrieBasedDictCreateJS(domainOpt));
|
||||||
}
|
}
|
||||||
|
|
||||||
stdout(gWhich, '');
|
stdout(gWhich, '');
|
||||||
|
@ -347,11 +380,22 @@ function initBenchmarks() {
|
||||||
exitBenchmark();
|
exitBenchmark();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
bms.add(' - Trie-based JS (2nd-gen)', function() {
|
||||||
|
lookupDict(newTrieDicts, trieBasedDictTest);
|
||||||
|
})
|
||||||
if ( hnTrieManager.matchesWASM !== null ) {
|
if ( hnTrieManager.matchesWASM !== null ) {
|
||||||
bms.add(' - Trie-based WASM (2nd-gen)', function() {
|
bms.add(' - Trie-based WASM (2nd-gen)', function() {
|
||||||
lookupDict(newTrieDicts, trieBasedDictTestWASM);
|
lookupDict(newTrieDicts, trieBasedDictTestWASM);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
bms.add(' - Trie-based JS (3rd-gen)', function() {
|
||||||
|
lookupDict(newTrieDicts, bigtrieBasedDictTestJS);
|
||||||
|
})
|
||||||
|
if ( hnBigTrieWASM.matchesWASM !== null ) {
|
||||||
|
bms.add(' - Trie-based WASM (3rd-gen)', function() {
|
||||||
|
lookupDict(bigTrieDicts, bigtrieBasedDictTestWASM);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
return bms;
|
return bms;
|
||||||
})());
|
})());
|
||||||
|
@ -361,6 +405,8 @@ function initBenchmarks() {
|
||||||
|
|
||||||
Promise.all([
|
Promise.all([
|
||||||
hnTrieManager.readyToUse(),
|
hnTrieManager.readyToUse(),
|
||||||
|
hnBigTrieJS.readyToUse(),
|
||||||
|
hnBigTrieWASM.readyToUse(),
|
||||||
]).then(( ) => {
|
]).then(( ) => {
|
||||||
initBenchmarks();
|
initBenchmarks();
|
||||||
});
|
});
|
||||||
|
|
|
@ -5,10 +5,9 @@
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
</head>
|
</head>
|
||||||
<body style="font: 14px sans-serif">
|
<body style="font: 14px sans-serif">
|
||||||
<h1>HNTrie test</h1>
|
<h1>HNTrieContainer test</h1>
|
||||||
<div><button id="test" type="button">Test!</button></div>
|
<div><button id="test" type="button">Test!</button></div>
|
||||||
<div id="stdout"></div>
|
<div id="stdout"></div>
|
||||||
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
|
|
||||||
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
|
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
|
||||||
<!-- <script src="../../src/js/hntrie.js"></script> -->
|
<!-- <script src="../../src/js/hntrie.js"></script> -->
|
||||||
<script src="hostname-pool.js"></script>
|
<script src="hostname-pool.js"></script>
|
||||||
|
@ -35,7 +34,45 @@ const stdout = function(s) {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
|
// Dictionary of hostnames
|
||||||
|
//
|
||||||
|
const FilterHostnameDict = function(hostnames) {
|
||||||
|
this.h = ''; // short-lived register
|
||||||
|
this.dict = new Set();
|
||||||
|
if ( hostnames !== undefined ) {
|
||||||
|
this.fromIterable(hostnames);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
FilterHostnameDict.prototype = {
|
||||||
|
add: function(hn) {
|
||||||
|
if ( this.dict.has(hn) ) { return false; }
|
||||||
|
this.dict.add(hn);
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
fromIterable: function(hostnames) {
|
||||||
|
for ( let hn of hostnames ) {
|
||||||
|
this.add(hn);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
},
|
||||||
|
matches: function(needle) {
|
||||||
|
while ( this.dict.has(needle) === false ) {
|
||||||
|
const pos = needle.indexOf('.');
|
||||||
|
if ( pos === -1 ) {
|
||||||
|
this.h = '';
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
needle = needle.slice(pos + 1);
|
||||||
|
}
|
||||||
|
this.h = needle;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
const testFlavor = function(hostnames, name, matchesFn, hitFn) {
|
||||||
stdout('\xA0');
|
stdout('\xA0');
|
||||||
stdout('Testing ' + name + '...');
|
stdout('Testing ' + name + '...');
|
||||||
|
|
||||||
|
@ -44,25 +81,25 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
|
||||||
for ( let i = 0; i < hostnames.length; i++ ) {
|
for ( let i = 0; i < hostnames.length; i++ ) {
|
||||||
// Exact hits
|
// Exact hits
|
||||||
let needle = hostnames[i];
|
let needle = hostnames[i];
|
||||||
if ( matchesFn(needle) !== hit ) {
|
if ( hitFn(matchesFn(needle)) === false ) {
|
||||||
stdout('Exact hits failed: ' + needle);
|
stdout('Exact hits failed: ' + needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Subdomain hits
|
// Subdomain hits
|
||||||
needle = createRandomLabel() + '.' + hostnames[i];
|
needle = createRandomLabel() + '.' + hostnames[i];
|
||||||
if ( matchesFn(needle) !== hit ) {
|
if ( hitFn(matchesFn(needle)) === false ) {
|
||||||
stdout('Subdomain hits failed: ' + needle);
|
stdout('Subdomain hits failed: ' + needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Misses batch 1
|
// Misses batch 1
|
||||||
needle = createRandomLabel() + '.com';
|
needle = createRandomLabel() + '.com';
|
||||||
if ( matchesFn(needle) !== miss ) {
|
if ( hitFn(matchesFn(needle)) !== false ) {
|
||||||
stdout('Misses batch 1: ' + needle);
|
stdout('Misses batch 1: ' + needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Misses batch 2
|
// Misses batch 2
|
||||||
needle = hostnames[i] + '.' + createRandomLabel();
|
needle = hostnames[i] + '.' + createRandomLabel();
|
||||||
if ( matchesFn(needle) !== miss ) {
|
if ( hitFn(matchesFn(needle)) !== false ) {
|
||||||
stdout('Misses batch 2: ' + needle);
|
stdout('Misses batch 2: ' + needle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,7 +108,7 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
|
||||||
let pos = needle.lastIndexOf('.');
|
let pos = needle.lastIndexOf('.');
|
||||||
if ( pos !== -1 ) {
|
if ( pos !== -1 ) {
|
||||||
needle = needle.slice(0, pos) + needle.slice(pos + 1);
|
needle = needle.slice(0, pos) + needle.slice(pos + 1);
|
||||||
if ( matchesFn(needle) !== miss ) {
|
if ( hitFn(matchesFn(needle)) !== false ) {
|
||||||
stdout('Misses batch 3: ' + needle);
|
stdout('Misses batch 3: ' + needle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,19 +124,98 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
hnTrieManager.readyToUse().then(( ) => {
|
const hnBigTrieJS = new HNTrieContainer();
|
||||||
const oldTrie = HNTrieBuilder.fromIterable(hostnamePool);
|
const hnBigTrieWASM = new HNTrieContainer();
|
||||||
const theTrie = hnTrieManager.fromIterable(hostnamePool);
|
const hnBigTrieUnserialized = new HNTrieContainer();
|
||||||
|
|
||||||
|
Promise.all([
|
||||||
|
hnBigTrieJS.readyToUse(),
|
||||||
|
hnBigTrieWASM.readyToUse()
|
||||||
|
]).then(( ) => {
|
||||||
|
let t0 = performance.now();
|
||||||
|
const theSet = new FilterHostnameDict(hostnamePool);
|
||||||
|
let t1 = performance.now();
|
||||||
|
stdout('\xA0');
|
||||||
|
stdout(
|
||||||
|
'Set creation completed in ' +
|
||||||
|
(t1 - t0).toFixed(2) + ' ms'
|
||||||
|
);
|
||||||
|
|
||||||
|
t0 = performance.now();
|
||||||
|
const theTrieJS = hnBigTrieJS.fromIterable(hostnamePool, 'addJS');
|
||||||
|
hnBigTrieJS.optimize();
|
||||||
|
t1 = performance.now();
|
||||||
|
stdout('\xA0');
|
||||||
|
stdout(
|
||||||
|
'HNTrieContainer creation (JS) completed in ' +
|
||||||
|
(t1 - t0).toFixed(2) + ' ms'
|
||||||
|
);
|
||||||
|
|
||||||
|
let theTrieWASM;
|
||||||
|
if ( hnBigTrieWASM.addWASM instanceof Function ) {
|
||||||
|
t0 = performance.now();
|
||||||
|
theTrieWASM = hnBigTrieWASM.fromIterable(hostnamePool, 'addWASM');
|
||||||
|
hnBigTrieWASM.optimize();
|
||||||
|
t1 = performance.now();
|
||||||
|
stdout('\xA0');
|
||||||
|
stdout(
|
||||||
|
'HNTrieContainer creation (WASM) completed in ' +
|
||||||
|
(t1 - t0).toFixed(2) + ' ms'
|
||||||
|
);
|
||||||
|
|
||||||
|
const bufJS = theTrieJS.container.buf;
|
||||||
|
const bufWASM = theTrieWASM.container.buf;
|
||||||
|
for ( let i = 0; i < bufJS.length; i++ ) {
|
||||||
|
if ( bufJS[i] !== bufWASM[i] ) {
|
||||||
|
stdout('theTrieWASM failure at index ' + i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let selfie = hnBigTrieJS.serialize();
|
||||||
|
t0 = performance.now();
|
||||||
|
hnBigTrieUnserialized.unserialize(selfie);
|
||||||
|
const theTrieUnserialized = hnBigTrieUnserialized.createOne(hnBigTrieJS.compileOne(theTrieJS));
|
||||||
|
t1 = performance.now();
|
||||||
|
stdout('\xA0');
|
||||||
|
stdout(
|
||||||
|
'HNTrieContainer creation (unserialized) completed in ' +
|
||||||
|
(t1 - t0).toFixed(2) + ' ms'
|
||||||
|
);
|
||||||
|
selfie = undefined;
|
||||||
|
|
||||||
document.getElementById('test').addEventListener('click', ( ) => {
|
document.getElementById('test').addEventListener('click', ( ) => {
|
||||||
let parent = document.getElementById('stdout');
|
let parent = document.getElementById('stdout');
|
||||||
while ( parent.childElementCount !== 0 ) {
|
while ( parent.childElementCount !== 0 ) {
|
||||||
parent.removeChild(parent.firstChild);
|
parent.removeChild(parent.firstChild);
|
||||||
}
|
}
|
||||||
testFlavor(hostnamePool, 'Old Trie (JS)', oldTrie.matches.bind(oldTrie), true, false);
|
testFlavor(
|
||||||
testFlavor(hostnamePool, 'New Trie (JS)', theTrie.matchesJS.bind(theTrie), 1, 0);
|
hostnamePool,
|
||||||
if ( hnTrieManager.matchesWASM instanceof Function ) {
|
'Set (JS)',
|
||||||
testFlavor(hostnamePool, 'New Trie (WASM)', theTrie.matchesWASM.bind(theTrie), 1, 0);
|
theSet.matches.bind(theSet),
|
||||||
|
r => r
|
||||||
|
);
|
||||||
|
testFlavor(
|
||||||
|
hostnamePool,
|
||||||
|
'HNTrieContainer (JS)',
|
||||||
|
theTrieJS.matchesJS.bind(theTrieJS),
|
||||||
|
r => r >= 0
|
||||||
|
);
|
||||||
|
if ( theTrieWASM !== undefined ) {
|
||||||
|
testFlavor(
|
||||||
|
hostnamePool,
|
||||||
|
'HNTrieContainer (WASM)',
|
||||||
|
theTrieWASM.matchesWASM.bind(theTrieWASM),
|
||||||
|
r => r >= 0
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
testFlavor(
|
||||||
|
hostnamePool,
|
||||||
|
'HNTrieContainer (unserialized)',
|
||||||
|
theTrieUnserialized.matchesJS.bind(theTrieUnserialized),
|
||||||
|
r => r >= 0
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,8 @@
|
||||||
<p>Some of the pages below are hosted on <a href="raw.githack.com">raw.githack.com</a> in order to ensure some of the secondary resources can be properly loaded (specifically, the WebAssembly modules, as they <a href="https://github.com/WebAssembly/design/blob/master/Web.md#webassemblyinstantiatestreaming">require to be loaded using same-origin policy</a>).</p>
|
<p>Some of the pages below are hosted on <a href="raw.githack.com">raw.githack.com</a> in order to ensure some of the secondary resources can be properly loaded (specifically, the WebAssembly modules, as they <a href="https://github.com/WebAssembly/design/blob/master/Web.md#webassemblyinstantiatestreaming">require to be loaded using same-origin policy</a>).</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hntrie-test.html">HNTrie: tests</a>
|
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hntrie-test.html">HNTrie: tests</a>
|
||||||
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnset-benchmark.html">HNTrie: benchmarks</a>
|
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnset-benchmark.html">HNTrie, small (2) to medium (~1000) set: benchmarks</a>
|
||||||
|
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnbigset-benchmark.html">HNTrie, small (2) to large (40,000+) set: benchmarks</a>
|
||||||
</ul>
|
</ul>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
@ -137,7 +137,7 @@ const µBlock = (function() { // jshint ignore:line
|
||||||
// Read-only
|
// Read-only
|
||||||
systemSettings: {
|
systemSettings: {
|
||||||
compiledMagic: 6, // Increase when compiled format changes
|
compiledMagic: 6, // Increase when compiled format changes
|
||||||
selfieMagic: 6 // Increase when selfie format changes
|
selfieMagic: 7 // Increase when selfie format changes
|
||||||
},
|
},
|
||||||
|
|
||||||
restoreBackupSettings: {
|
restoreBackupSettings: {
|
||||||
|
|
935
src/js/hntrie.js
935
src/js/hntrie.js
File diff suppressed because it is too large
Load Diff
|
@ -20,7 +20,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* jshint bitwise: false */
|
/* jshint bitwise: false */
|
||||||
/* global punycode, hnTrieManager */
|
/* global punycode, HNTrieContainer */
|
||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
@ -738,42 +738,40 @@ registerFilterClass(FilterRegex);
|
||||||
const FilterOrigin = function() {
|
const FilterOrigin = function() {
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterOrigin.prototype.wrapped = {
|
FilterOrigin.prototype = {
|
||||||
compile: function() {
|
wrapped: {
|
||||||
return '';
|
compile: function() {
|
||||||
|
return '';
|
||||||
|
},
|
||||||
|
logData: function() {
|
||||||
|
return {
|
||||||
|
compiled: ''
|
||||||
|
};
|
||||||
|
},
|
||||||
|
match: function() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
matchOrigin: function() {
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
match: function(url, tokenBeg) {
|
||||||
|
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
|
||||||
},
|
},
|
||||||
logData: function() {
|
logData: function() {
|
||||||
return {
|
const out = this.wrapped.logData();
|
||||||
compiled: ''
|
const domainOpt = this.toDomainOpt();
|
||||||
};
|
out.compiled = [ this.fid, domainOpt, out.compiled ];
|
||||||
|
if ( out.opts === undefined ) {
|
||||||
|
out.opts = 'domain=' + domainOpt;
|
||||||
|
} else {
|
||||||
|
out.opts += ',domain=' + domainOpt;
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
},
|
||||||
|
compile: function() {
|
||||||
|
return [ this.fid, this.toDomainOpt(), this.wrapped.compile() ];
|
||||||
},
|
},
|
||||||
match: function() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterOrigin.prototype.matchOrigin = function() {
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterOrigin.prototype.match = function(url, tokenBeg) {
|
|
||||||
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterOrigin.prototype.logData = function() {
|
|
||||||
var out = this.wrapped.logData(),
|
|
||||||
domainOpt = this.toDomainOpt();
|
|
||||||
out.compiled = [ this.fid, domainOpt, out.compiled ];
|
|
||||||
if ( out.opts === undefined ) {
|
|
||||||
out.opts = 'domain=' + domainOpt;
|
|
||||||
} else {
|
|
||||||
out.opts += ',domain=' + domainOpt;
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterOrigin.prototype.compile = function() {
|
|
||||||
return [ this.fid, this.toDomainOpt(), this.wrapped.compile() ];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// *** start of specialized origin matchers
|
// *** start of specialized origin matchers
|
||||||
|
@ -853,10 +851,12 @@ FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
|
||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
|
if ( this.oneOf === null ) {
|
||||||
this.oneOf = hnTrieManager.fromDomainOpt(this.domainOpt);
|
this.oneOf = FilterOrigin.trieContainer.fromIterable(
|
||||||
|
this.domainOpt.split('|')
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return this.oneOf.matches(pageHostnameRegister) === 1;
|
return this.oneOf.matches(pageHostnameRegister) !== -1;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
@ -885,12 +885,12 @@ FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
|
||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( hnTrieManager.isValidRef(this.noneOf) === false ) {
|
if ( this.noneOf === null ) {
|
||||||
this.noneOf = hnTrieManager.fromDomainOpt(
|
this.noneOf = FilterOrigin.trieContainer.fromIterable(
|
||||||
this.domainOpt.replace(/~/g, '')
|
this.domainOpt.replace(/~/g, '').split('|')
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return this.noneOf.matches(pageHostnameRegister) === 0;
|
return this.noneOf.matches(pageHostnameRegister) === -1;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
@ -926,8 +926,8 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
|
||||||
oneOf.push(hostname);
|
oneOf.push(hostname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.oneOf = hnTrieManager.fromIterable(oneOf);
|
this.oneOf = FilterOrigin.trieContainer.fromIterable(oneOf);
|
||||||
this.noneOf = hnTrieManager.fromIterable(noneOf);
|
this.noneOf = FilterOrigin.trieContainer.fromIterable(noneOf);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
toDomainOpt: {
|
toDomainOpt: {
|
||||||
|
@ -937,12 +937,10 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
|
||||||
},
|
},
|
||||||
matchOrigin: {
|
matchOrigin: {
|
||||||
value: function() {
|
value: function() {
|
||||||
if ( hnTrieManager.isValidRef(this.oneOf) === false ) {
|
if ( this.oneOf === null ) { this.init(); }
|
||||||
this.init();
|
|
||||||
}
|
|
||||||
let needle = pageHostnameRegister;
|
let needle = pageHostnameRegister;
|
||||||
return this.oneOf.matches(needle) === 1 &&
|
return this.oneOf.matches(needle) !== -1 &&
|
||||||
this.noneOf.matches(needle) === 0;
|
this.noneOf.matches(needle) === -1;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
@ -990,6 +988,33 @@ FilterOrigin.load = function(args) {
|
||||||
return f;
|
return f;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
FilterOrigin.trieContainer = (function() {
|
||||||
|
let trieDetails;
|
||||||
|
try {
|
||||||
|
trieDetails = JSON.parse(
|
||||||
|
vAPI.localStorage.getItem('FilterOrigin.trieDetails')
|
||||||
|
);
|
||||||
|
} catch(ex) {
|
||||||
|
}
|
||||||
|
return new HNTrieContainer(trieDetails);
|
||||||
|
})();
|
||||||
|
|
||||||
|
FilterOrigin.readyToUse = function() {
|
||||||
|
return FilterOrigin.trieContainer.readyToUse();
|
||||||
|
};
|
||||||
|
|
||||||
|
FilterOrigin.reset = function() {
|
||||||
|
return FilterOrigin.trieContainer.reset();
|
||||||
|
};
|
||||||
|
|
||||||
|
FilterOrigin.optimize = function() {
|
||||||
|
const trieDetails = FilterOrigin.trieContainer.optimize();
|
||||||
|
vAPI.localStorage.setItem(
|
||||||
|
'FilterOrigin.trieDetails',
|
||||||
|
JSON.stringify(trieDetails)
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
registerFilterClass(FilterOrigin);
|
registerFilterClass(FilterOrigin);
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
@ -1059,60 +1084,66 @@ FilterDataHolderEntry.load = function(data) {
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
// Dictionary of hostnames
|
// Dictionary of hostnames
|
||||||
//
|
|
||||||
const FilterHostnameDict = function() {
|
const FilterHostnameDict = function(args) {
|
||||||
this.h = ''; // short-lived register
|
this.h = ''; // short-lived register
|
||||||
this.dict = new Set();
|
this.dict = FilterHostnameDict.trieContainer.createOne(args);
|
||||||
};
|
};
|
||||||
|
|
||||||
Object.defineProperty(FilterHostnameDict.prototype, 'size', {
|
FilterHostnameDict.prototype = {
|
||||||
get: function() {
|
get size() {
|
||||||
return this.dict.size;
|
return this.dict.size;
|
||||||
|
},
|
||||||
|
add: function(hn) {
|
||||||
|
return this.dict.add(hn);
|
||||||
|
},
|
||||||
|
match: function() {
|
||||||
|
const pos = this.dict.matches(requestHostnameRegister);
|
||||||
|
if ( pos === -1 ) { return false; }
|
||||||
|
this.h = requestHostnameRegister.slice(pos);
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
logData: function() {
|
||||||
|
return {
|
||||||
|
raw: '||' + this.h + '^',
|
||||||
|
regex: rawToRegexStr(this.h, 0) + '(?:[^%.0-9a-z_-]|$)',
|
||||||
|
compiled: this.h
|
||||||
|
};
|
||||||
|
},
|
||||||
|
compile: function() {
|
||||||
|
return [ this.fid, FilterHostnameDict.trieContainer.compileOne(this.dict) ];
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
FilterHostnameDict.trieContainer = (function() {
|
||||||
|
let trieDetails;
|
||||||
|
try {
|
||||||
|
trieDetails = JSON.parse(
|
||||||
|
vAPI.localStorage.getItem('FilterHostnameDict.trieDetails')
|
||||||
|
);
|
||||||
|
} catch(ex) {
|
||||||
}
|
}
|
||||||
});
|
return new HNTrieContainer(trieDetails);
|
||||||
|
})();
|
||||||
|
|
||||||
FilterHostnameDict.prototype.add = function(hn) {
|
FilterHostnameDict.readyToUse = function() {
|
||||||
if ( this.dict.has(hn) === true ) { return false; }
|
return FilterHostnameDict.trieContainer.readyToUse();
|
||||||
this.dict.add(hn);
|
|
||||||
return true;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterHostnameDict.prototype.remove = function(hn) {
|
FilterHostnameDict.reset = function() {
|
||||||
return this.dict.delete(hn);
|
return FilterHostnameDict.trieContainer.reset();
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterHostnameDict.prototype.match = function() {
|
FilterHostnameDict.optimize = function() {
|
||||||
// TODO: mind IP addresses
|
const trieDetails = FilterHostnameDict.trieContainer.optimize();
|
||||||
var pos,
|
vAPI.localStorage.setItem(
|
||||||
hostname = requestHostnameRegister;
|
'FilterHostnameDict.trieDetails',
|
||||||
while ( this.dict.has(hostname) === false ) {
|
JSON.stringify(trieDetails)
|
||||||
pos = hostname.indexOf('.');
|
);
|
||||||
if ( pos === -1 ) {
|
|
||||||
this.h = '';
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
hostname = hostname.slice(pos + 1);
|
|
||||||
}
|
|
||||||
this.h = hostname;
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterHostnameDict.prototype.logData = function() {
|
|
||||||
return {
|
|
||||||
raw: '||' + this.h + '^',
|
|
||||||
regex: rawToRegexStr(this.h, 0) + '(?:[^%.0-9a-z_-]|$)',
|
|
||||||
compiled: this.h
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
FilterHostnameDict.prototype.compile = function() {
|
|
||||||
return [ this.fid, Array.from(this.dict) ];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
FilterHostnameDict.load = function(args) {
|
FilterHostnameDict.load = function(args) {
|
||||||
var f = new FilterHostnameDict();
|
return new FilterHostnameDict(args[1]);
|
||||||
f.dict = new Set(args[1]);
|
|
||||||
return f;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
registerFilterClass(FilterHostnameDict);
|
registerFilterClass(FilterHostnameDict);
|
||||||
|
@ -1974,7 +2005,8 @@ FilterContainer.prototype.reset = function() {
|
||||||
this.filterParser.reset();
|
this.filterParser.reset();
|
||||||
|
|
||||||
// This will invalidate all hn tries throughout uBO:
|
// This will invalidate all hn tries throughout uBO:
|
||||||
hnTrieManager.reset();
|
FilterOrigin.reset();
|
||||||
|
FilterHostnameDict.reset();
|
||||||
|
|
||||||
// Runtime registers
|
// Runtime registers
|
||||||
this.cbRegister = undefined;
|
this.cbRegister = undefined;
|
||||||
|
@ -1985,20 +2017,20 @@ FilterContainer.prototype.reset = function() {
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
FilterContainer.prototype.freeze = function() {
|
FilterContainer.prototype.freeze = function() {
|
||||||
let filterPairId = FilterPair.fid,
|
const filterPairId = FilterPair.fid,
|
||||||
filterBucketId = FilterBucket.fid,
|
filterBucketId = FilterBucket.fid,
|
||||||
filterDataHolderId = FilterDataHolder.fid,
|
filterDataHolderId = FilterDataHolder.fid,
|
||||||
redirectTypeValue = typeNameToTypeValue.redirect,
|
redirectTypeValue = typeNameToTypeValue.redirect,
|
||||||
unserialize = µb.CompiledLineIO.unserialize;
|
unserialize = µb.CompiledLineIO.unserialize;
|
||||||
|
|
||||||
for ( let line of this.goodFilters ) {
|
for ( const line of this.goodFilters ) {
|
||||||
if ( this.badFilters.has(line) ) {
|
if ( this.badFilters.has(line) ) {
|
||||||
this.discardedCount += 1;
|
this.discardedCount += 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let args = unserialize(line);
|
const args = unserialize(line);
|
||||||
let bits = args[0];
|
const bits = args[0];
|
||||||
|
|
||||||
// Special cases: delegate to more specialized engines.
|
// Special cases: delegate to more specialized engines.
|
||||||
// Redirect engine.
|
// Redirect engine.
|
||||||
|
@ -2008,8 +2040,8 @@ FilterContainer.prototype.freeze = function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Plain static filters.
|
// Plain static filters.
|
||||||
let tokenHash = args[1];
|
const tokenHash = args[1];
|
||||||
let fdata = args[2];
|
const fdata = args[2];
|
||||||
|
|
||||||
// Special treatment: data-holding filters are stored separately
|
// Special treatment: data-holding filters are stored separately
|
||||||
// because they require special matching algorithm (unlike other
|
// because they require special matching algorithm (unlike other
|
||||||
|
@ -2063,6 +2095,8 @@ FilterContainer.prototype.freeze = function() {
|
||||||
|
|
||||||
this.filterParser.reset();
|
this.filterParser.reset();
|
||||||
this.goodFilters = new Set();
|
this.goodFilters = new Set();
|
||||||
|
FilterOrigin.optimize();
|
||||||
|
FilterHostnameDict.optimize();
|
||||||
this.frozen = true;
|
this.frozen = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2072,7 +2106,7 @@ FilterContainer.prototype.freeze = function() {
|
||||||
// on asynchronous operations (ex.: when loading a wasm module).
|
// on asynchronous operations (ex.: when loading a wasm module).
|
||||||
|
|
||||||
FilterContainer.prototype.readyToUse = function() {
|
FilterContainer.prototype.readyToUse = function() {
|
||||||
return hnTrieManager.readyToUse();
|
return Promise.resolve();
|
||||||
};
|
};
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
@ -2108,6 +2142,7 @@ FilterContainer.prototype.toSelfie = function() {
|
||||||
allowFilterCount: this.allowFilterCount,
|
allowFilterCount: this.allowFilterCount,
|
||||||
blockFilterCount: this.blockFilterCount,
|
blockFilterCount: this.blockFilterCount,
|
||||||
discardedCount: this.discardedCount,
|
discardedCount: this.discardedCount,
|
||||||
|
trieContainer: FilterHostnameDict.trieContainer.serialize(),
|
||||||
categories: categoriesToSelfie(this.categories),
|
categories: categoriesToSelfie(this.categories),
|
||||||
dataFilters: dataFiltersToSelfie(this.dataFilters)
|
dataFilters: dataFiltersToSelfie(this.dataFilters)
|
||||||
};
|
};
|
||||||
|
@ -2123,6 +2158,7 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
|
||||||
this.allowFilterCount = selfie.allowFilterCount;
|
this.allowFilterCount = selfie.allowFilterCount;
|
||||||
this.blockFilterCount = selfie.blockFilterCount;
|
this.blockFilterCount = selfie.blockFilterCount;
|
||||||
this.discardedCount = selfie.discardedCount;
|
this.discardedCount = selfie.discardedCount;
|
||||||
|
FilterHostnameDict.trieContainer.unserialize(selfie.trieContainer);
|
||||||
|
|
||||||
for ( let categoryEntry of selfie.categories ) {
|
for ( let categoryEntry of selfie.categories ) {
|
||||||
let tokenMap = new Map();
|
let tokenMap = new Map();
|
||||||
|
|
Binary file not shown.
|
@ -25,155 +25,178 @@
|
||||||
;; module start
|
;; module start
|
||||||
;;
|
;;
|
||||||
|
|
||||||
;; (func $log (import "imports" "log") (param i32 i32 i32))
|
(func $growBuf (import "imports" "growBuf"))
|
||||||
|
|
||||||
(memory (import "imports" "memory") 1)
|
(memory (import "imports" "memory") 1)
|
||||||
|
|
||||||
|
;; Trie container
|
||||||
|
;;
|
||||||
|
;; Memory layout, byte offset:
|
||||||
|
;; 0-254: needle being processed
|
||||||
|
;; 255: length of needle
|
||||||
|
;; 256-259: offset to start of trie data section (=> trie0)
|
||||||
|
;; 260-263: offset to end of trie data section (=> trie1)
|
||||||
|
;; 264-267: offset to start of character data section (=> char0)
|
||||||
|
;; 268-271: offset to end of character data section (=> char1)
|
||||||
|
;; 272: start of trie data section
|
||||||
|
;;
|
||||||
|
|
||||||
;;
|
;;
|
||||||
;; Public functions
|
;; Public functions
|
||||||
;;
|
;;
|
||||||
|
|
||||||
;;
|
;;
|
||||||
;; unsigned int matches(offset)
|
;; unsigned int matches(icell)
|
||||||
;;
|
;;
|
||||||
;; Test whether the currently set needle matches the trie at specified offset.
|
;; Test whether the currently set needle matches the trie at specified trie
|
||||||
;;
|
;; offset.
|
||||||
;; Memory layout, byte offset:
|
|
||||||
;; 0-254: encoded needle (ASCII)
|
|
||||||
;; 255 : needle length
|
|
||||||
;; 256- : tries
|
|
||||||
;;
|
;;
|
||||||
(func (export "matches")
|
(func (export "matches")
|
||||||
(param $itrie i32)
|
(param $icell i32) ;; offset to root cell of the trie
|
||||||
(result i32) ;; result: 0 = miss, 1 = hit
|
(result i32) ;; result = match index, -1 = miss
|
||||||
(local $ineedle i32) ;; current needle offset
|
(local $char0 i32) ;; offset to first character data
|
||||||
(local $nchar i32) ;; needle char being processed
|
(local $ineedle i32) ;; current needle offset
|
||||||
(local $tchar i32) ;; trie char being processed
|
(local $c i32)
|
||||||
(local $lxtra i32)
|
(local $v i32)
|
||||||
(local $ixtra i32)
|
(local $n i32)
|
||||||
i32.const 255
|
(local $i0 i32)
|
||||||
|
(local $i1 i32)
|
||||||
|
;;
|
||||||
|
i32.const 264 ;; start of char section is stored at addr 264
|
||||||
|
i32.load
|
||||||
|
set_local $char0
|
||||||
|
;; $icell is an index into an array of 32-bit values
|
||||||
|
get_local $icell
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
set_local $icell
|
||||||
|
;; let ineedle = this.buf[255];
|
||||||
|
i32.const 255 ;; addr of needle is stored at addr 255
|
||||||
i32.load8_u
|
i32.load8_u
|
||||||
set_local $ineedle
|
set_local $ineedle
|
||||||
loop $nextNeedleChar
|
;; for (;;) {
|
||||||
|
block $noSegment loop $nextSegment
|
||||||
|
;; if ( ineedle === 0 ) { return -1; }
|
||||||
|
get_local $ineedle
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const -1
|
||||||
|
return
|
||||||
|
end
|
||||||
;; ineedle -= 1;
|
;; ineedle -= 1;
|
||||||
get_local $ineedle
|
get_local $ineedle
|
||||||
i32.const -1
|
i32.const -1
|
||||||
i32.add
|
i32.add
|
||||||
tee_local $ineedle
|
tee_local $ineedle
|
||||||
;; let nchar = ineedle === -1 ? 0 : buf[ineedle];
|
;; let c = this.buf[ineedle];
|
||||||
i32.const 0
|
i32.load8_u
|
||||||
i32.lt_s
|
set_local $c
|
||||||
if
|
;; for (;;) {
|
||||||
i32.const 0
|
block $foundSegment loop $findSegment
|
||||||
set_local $nchar
|
;; v = this.buf32[icell+2];
|
||||||
else
|
get_local $icell
|
||||||
get_local $ineedle
|
i32.load offset=8
|
||||||
|
tee_local $v
|
||||||
|
;; i0 = this.char0 + (v & 0x00FFFFFF);
|
||||||
|
i32.const 0x00FFFFFF
|
||||||
|
i32.and
|
||||||
|
get_local $char0
|
||||||
|
i32.add
|
||||||
|
tee_local $i0
|
||||||
|
;; if ( this.buf[i0] === c ) { break; }
|
||||||
i32.load8_u
|
i32.load8_u
|
||||||
set_local $nchar
|
get_local $c
|
||||||
end
|
|
||||||
block $trieCharEqNeedleChar loop $nextTrieChar
|
|
||||||
;; let tchar = buf[itrie+8];
|
|
||||||
get_local $itrie
|
|
||||||
i32.load8_u offset=8
|
|
||||||
tee_local $tchar
|
|
||||||
;; if ( tchar === nchar ) { break; }
|
|
||||||
get_local $nchar
|
|
||||||
i32.eq
|
i32.eq
|
||||||
br_if $trieCharEqNeedleChar
|
br_if $foundSegment
|
||||||
;; if ( tchar === 0 && nchar === 0x2E ) { return 1; }
|
;; icell = this.buf32[icell+0];
|
||||||
get_local $tchar
|
get_local $icell
|
||||||
i32.eqz
|
|
||||||
if
|
|
||||||
get_local $nchar
|
|
||||||
i32.const 0x2E
|
|
||||||
i32.eq
|
|
||||||
if
|
|
||||||
i32.const 1
|
|
||||||
return
|
|
||||||
end
|
|
||||||
end
|
|
||||||
;; itrie = buf32[itrie >>> 2];
|
|
||||||
get_local $itrie
|
|
||||||
i32.load
|
i32.load
|
||||||
tee_local $itrie
|
i32.const 2
|
||||||
;; if ( itrie === 0 ) { return 0; }
|
i32.shl
|
||||||
|
tee_local $icell
|
||||||
i32.eqz
|
i32.eqz
|
||||||
if
|
if
|
||||||
i32.const 0
|
i32.const -1
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
br $nextTrieChar
|
br 0
|
||||||
end end
|
end end
|
||||||
;; if ( nchar === 0 ) { return 1; }
|
;; let n = v >>> 24;
|
||||||
get_local $nchar
|
get_local $v
|
||||||
i32.eqz
|
i32.const 24
|
||||||
|
i32.shr_u
|
||||||
|
tee_local $n
|
||||||
|
;; if ( n > 1 ) {
|
||||||
|
i32.const 1
|
||||||
|
i32.gt_u
|
||||||
if
|
if
|
||||||
i32.const 1
|
;; n -= 1;
|
||||||
return
|
get_local $n
|
||||||
end
|
i32.const -1
|
||||||
;; let lxtra = buf[itrie+9];
|
i32.add
|
||||||
get_local $itrie
|
tee_local $n
|
||||||
i32.load8_u offset=9
|
;; if ( n > ineedle ) { return -1; }
|
||||||
tee_local $lxtra
|
|
||||||
i32.eqz
|
|
||||||
if else
|
|
||||||
;; if ( lxtra > ineedle ) { return 0; }
|
|
||||||
get_local $lxtra
|
|
||||||
get_local $ineedle
|
get_local $ineedle
|
||||||
i32.gt_u
|
i32.gt_u
|
||||||
if
|
if
|
||||||
i32.const 0
|
i32.const -1
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
;; let ixtra = itrie + 10;
|
get_local $i0
|
||||||
get_local $itrie
|
i32.const 1
|
||||||
i32.const 10
|
|
||||||
i32.add
|
i32.add
|
||||||
tee_local $ixtra
|
tee_local $i0
|
||||||
;; lxtra += ixtra;
|
;; const i1 = i0 + n;
|
||||||
get_local $lxtra
|
get_local $n
|
||||||
i32.add
|
i32.add
|
||||||
set_local $lxtra
|
set_local $i1
|
||||||
;; do {
|
;; do {
|
||||||
block $noMoreExtraChars loop
|
loop
|
||||||
;; ineedle -= 1;
|
;; ineedle -= 1;
|
||||||
get_local $ineedle
|
get_local $ineedle
|
||||||
i32.const -1
|
i32.const -1
|
||||||
i32.add
|
i32.add
|
||||||
tee_local $ineedle
|
tee_local $ineedle
|
||||||
;; if ( buf[ineedle] !== buf[ixtra] ) { return 0; }
|
;; if ( this.buf[i0] !== this.buf[ineedle] ) { return -1; }
|
||||||
i32.load8_u
|
i32.load8_u
|
||||||
get_local $ixtra
|
get_local $i0
|
||||||
i32.load8_u
|
i32.load8_u
|
||||||
i32.ne
|
i32.ne
|
||||||
if
|
if
|
||||||
i32.const 0
|
i32.const -1
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
;; ixtra += 1;
|
;; i0 += 1;
|
||||||
get_local $ixtra
|
get_local $i0
|
||||||
i32.const 1
|
i32.const 1
|
||||||
i32.add
|
i32.add
|
||||||
tee_local $ixtra
|
tee_local $i0
|
||||||
;; while ( ixtra !== lxtra ) {
|
;; } while ( i0 < i1 );
|
||||||
get_local $lxtra
|
get_local $i1
|
||||||
i32.eq
|
i32.lt_u
|
||||||
br_if $noMoreExtraChars
|
br_if 0
|
||||||
br 0
|
end
|
||||||
end end
|
|
||||||
end
|
end
|
||||||
;; itrie = buf32[itrie + 4 >>> 2];
|
;; icell = this.buf32[icell+1];
|
||||||
get_local $itrie
|
get_local $icell
|
||||||
i32.load offset=4
|
i32.load offset=4
|
||||||
tee_local $itrie
|
i32.const 2
|
||||||
;; if ( itrie === 0 ) {
|
i32.shl
|
||||||
|
tee_local $icell
|
||||||
|
;; if ( icell === 0 ) { break; }
|
||||||
|
i32.eqz
|
||||||
|
br_if $noSegment
|
||||||
|
;; if ( this.buf32[icell+2] === 0 ) {
|
||||||
|
get_local $icell
|
||||||
|
i32.load
|
||||||
i32.eqz
|
i32.eqz
|
||||||
if
|
if
|
||||||
;; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0;
|
;; if ( ineedle === 0 || this.buf[ineedle-1] === 0x2E ) {
|
||||||
|
;; return ineedle;
|
||||||
|
;; }
|
||||||
get_local $ineedle
|
get_local $ineedle
|
||||||
i32.eqz
|
i32.eqz
|
||||||
if
|
if
|
||||||
i32.const 1
|
i32.const 0
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
get_local $ineedle
|
get_local $ineedle
|
||||||
|
@ -183,15 +206,462 @@
|
||||||
i32.const 0x2E
|
i32.const 0x2E
|
||||||
i32.eq
|
i32.eq
|
||||||
if
|
if
|
||||||
|
get_local $ineedle
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; icell = this.buf32[icell+1];
|
||||||
|
get_local $icell
|
||||||
|
i32.load offset=4
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
set_local $icell
|
||||||
|
end
|
||||||
|
br 0
|
||||||
|
end end
|
||||||
|
;; return ineedle === 0 || this.buf[ineedle-1] === 0x2E ? ineedle : -1;
|
||||||
|
get_local $ineedle
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
get_local $ineedle
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.const 0x2E
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
get_local $ineedle
|
||||||
|
return
|
||||||
|
end
|
||||||
|
i32.const -1
|
||||||
|
)
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; unsigned int add(icell)
|
||||||
|
;;
|
||||||
|
;; Add a new hostname to a trie which root cell is passed as argument.
|
||||||
|
;;
|
||||||
|
(func (export "add")
|
||||||
|
(param $icell i32) ;; index of root cell of the trie
|
||||||
|
(result i32) ;; result: 0 not added, 1 = added
|
||||||
|
(local $lhnchar i32) ;; number of characters left to process in hostname
|
||||||
|
(local $char0 i32) ;; offset to start of character data section
|
||||||
|
(local $vseg i32) ;; integer value describing a segment
|
||||||
|
(local $isegchar0 i32) ;; offset to start of current segment's character data
|
||||||
|
(local $isegchar i32)
|
||||||
|
(local $lsegchar i32) ;; number of character in current segment
|
||||||
|
(local $inext i32) ;; index of next cell to process
|
||||||
|
;;
|
||||||
|
;; let lhnchar = this.buf[255];
|
||||||
|
i32.const 255
|
||||||
|
i32.load8_u
|
||||||
|
tee_local $lhnchar
|
||||||
|
;; if ( lhnchar === 0 ) { return 0; }
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; let icell = iroot;
|
||||||
|
get_local $icell
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
tee_local $icell
|
||||||
|
;; if ( this.buf32[icell+2] === 0 ) {
|
||||||
|
i32.load offset=8
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;;this.buf32[icell+2] = this.addSegment(lhnchar);
|
||||||
|
;; return 1;
|
||||||
|
get_local $icell
|
||||||
|
get_local $lhnchar
|
||||||
|
call $addSegment
|
||||||
|
i32.store offset=8
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; if (
|
||||||
|
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
|
||||||
|
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
|
||||||
|
;; ) {
|
||||||
|
;; this.growBuf();
|
||||||
|
;; }
|
||||||
|
i32.const 264
|
||||||
|
i32.load
|
||||||
|
i32.const 260
|
||||||
|
i32.load
|
||||||
|
i32.sub
|
||||||
|
i32.const 24
|
||||||
|
i32.lt_u
|
||||||
|
if
|
||||||
|
call $growBuf
|
||||||
|
else
|
||||||
|
memory.size
|
||||||
|
i32.const 16
|
||||||
|
i32.shl
|
||||||
|
i32.const 268
|
||||||
|
i32.load
|
||||||
|
i32.sub
|
||||||
|
i32.const 256
|
||||||
|
i32.lt_u
|
||||||
|
if
|
||||||
|
call $growBuf
|
||||||
|
end
|
||||||
|
end
|
||||||
|
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
|
||||||
|
i32.const 264
|
||||||
|
i32.load
|
||||||
|
set_local $char0
|
||||||
|
;; for (;;) {
|
||||||
|
loop $nextSegment
|
||||||
|
;; const v = this.buf32[icell+2];
|
||||||
|
get_local $icell
|
||||||
|
i32.load offset=8
|
||||||
|
tee_local $vseg
|
||||||
|
;; if ( vseg === 0 ) {
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
get_local $icell
|
||||||
|
i32.load offset=4
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
set_local $icell
|
||||||
|
br $nextSegment
|
||||||
|
end
|
||||||
|
;; let isegchar0 = char0 + (vseg & 0x00FFFFFF);
|
||||||
|
get_local $char0
|
||||||
|
get_local $vseg
|
||||||
|
i32.const 0x00FFFFFF
|
||||||
|
i32.and
|
||||||
|
i32.add
|
||||||
|
tee_local $isegchar0
|
||||||
|
;; if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
|
||||||
|
i32.load8_u
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.ne
|
||||||
|
if
|
||||||
|
;; inext = this.buf32[icell+0];
|
||||||
|
get_local $icell
|
||||||
|
i32.load
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
tee_local $inext
|
||||||
|
;; if ( inext === 0 ) {
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;; this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
|
||||||
|
get_local $icell
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
get_local $lhnchar
|
||||||
|
call $addSegment
|
||||||
|
call $addCell
|
||||||
|
i32.store
|
||||||
|
;; return 1;
|
||||||
i32.const 1
|
i32.const 1
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
i32.const 0
|
;; icell = inext;
|
||||||
return
|
get_local $inext
|
||||||
|
set_local $icell
|
||||||
|
br $nextSegment
|
||||||
end
|
end
|
||||||
br 0
|
;; let isegchar = 1;
|
||||||
|
i32.const 1
|
||||||
|
set_local $isegchar
|
||||||
|
;; lhnchar -= 1;
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
set_local $lhnchar
|
||||||
|
;; const lsegchar = vseg >>> 24;
|
||||||
|
get_local $vseg
|
||||||
|
i32.const 24
|
||||||
|
i32.shr_u
|
||||||
|
tee_local $lsegchar
|
||||||
|
;; if ( lsegchar !== 1 ) {
|
||||||
|
i32.const 1
|
||||||
|
i32.ne
|
||||||
|
if
|
||||||
|
;; for (;;) {
|
||||||
|
block $mismatch loop
|
||||||
|
;; if ( isegchar === lsegchar ) { break; }
|
||||||
|
get_local $isegchar
|
||||||
|
get_local $lsegchar
|
||||||
|
i32.eq
|
||||||
|
br_if $mismatch
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.eqz
|
||||||
|
br_if $mismatch
|
||||||
|
;; if ( this.buf[isegchar0+isegchar] !== this.buf[lhnchar-1] ) { break; }
|
||||||
|
get_local $isegchar0
|
||||||
|
get_local $isegchar
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.ne
|
||||||
|
br_if $mismatch
|
||||||
|
;; isegchar += 1;
|
||||||
|
get_local $isegchar
|
||||||
|
i32.const 1
|
||||||
|
i32.add
|
||||||
|
set_local $isegchar
|
||||||
|
;; lhnchar -= 1;
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
set_local $lhnchar
|
||||||
|
br 0
|
||||||
|
end end
|
||||||
|
end
|
||||||
|
;; if ( isegchar === lsegchar ) {
|
||||||
|
get_local $isegchar
|
||||||
|
get_local $lsegchar
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
;; inext = this.buf32[icell+1];
|
||||||
|
get_local $icell
|
||||||
|
i32.load offset=4
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
set_local $inext
|
||||||
|
;; if ( lhnchar === 0 ) {
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;; if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
|
||||||
|
get_local $inext
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
get_local $inext
|
||||||
|
i32.load offset=8
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||||
|
get_local $icell
|
||||||
|
i32.const 0
|
||||||
|
get_local $inext
|
||||||
|
i32.const 2
|
||||||
|
i32.shr_u
|
||||||
|
i32.const 0
|
||||||
|
call $addCell
|
||||||
|
i32.store offset=4
|
||||||
|
else
|
||||||
|
;; if ( inext !== 0 ) {
|
||||||
|
get_local $inext
|
||||||
|
i32.eqz
|
||||||
|
if else
|
||||||
|
;; icell = inext;
|
||||||
|
get_local $inext
|
||||||
|
set_local $icell
|
||||||
|
br $nextSegment
|
||||||
|
end
|
||||||
|
;; inext = this.addCell(0, 0, 0);
|
||||||
|
;; this.buf32[icell+1] = inext;
|
||||||
|
get_local $icell
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
call $addCell
|
||||||
|
tee_local $inext
|
||||||
|
i32.store offset=4
|
||||||
|
;; this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(lhnchar));
|
||||||
|
get_local $inext
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
get_local $lhnchar
|
||||||
|
call $addSegment
|
||||||
|
call $addCell
|
||||||
|
i32.store offset=4
|
||||||
|
end
|
||||||
|
else
|
||||||
|
;; isegchar0 -= char0;
|
||||||
|
get_local $icell
|
||||||
|
get_local $isegchar0
|
||||||
|
get_local $char0
|
||||||
|
i32.sub
|
||||||
|
tee_local $isegchar0
|
||||||
|
;; this.buf32[icell+2] = isegchar << 24 | isegchar0;
|
||||||
|
get_local $isegchar
|
||||||
|
i32.const 24
|
||||||
|
i32.shl
|
||||||
|
i32.or
|
||||||
|
i32.store offset=8
|
||||||
|
;; inext = this.addCell(
|
||||||
|
;; 0,
|
||||||
|
;; this.buf32[icell+1],
|
||||||
|
;; lsegchar - isegchar << 24 | isegchar0 + isegchar
|
||||||
|
;; );
|
||||||
|
;; this.buf32[icell+1] = inext;
|
||||||
|
get_local $icell
|
||||||
|
i32.const 0
|
||||||
|
get_local $icell
|
||||||
|
i32.load offset=4
|
||||||
|
get_local $lsegchar
|
||||||
|
get_local $isegchar
|
||||||
|
i32.sub
|
||||||
|
i32.const 24
|
||||||
|
i32.shl
|
||||||
|
get_local $isegchar0
|
||||||
|
get_local $isegchar
|
||||||
|
i32.add
|
||||||
|
i32.or
|
||||||
|
call $addCell
|
||||||
|
tee_local $inext
|
||||||
|
i32.store offset=4
|
||||||
|
;; if ( lhnchar === 0 ) {
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
|
||||||
|
get_local $icell
|
||||||
|
i32.const 0
|
||||||
|
get_local $inext
|
||||||
|
i32.const 0
|
||||||
|
call $addCell
|
||||||
|
i32.store offset=4
|
||||||
|
else
|
||||||
|
;; this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
|
||||||
|
get_local $inext
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
get_local $lhnchar
|
||||||
|
call $addSegment
|
||||||
|
call $addCell
|
||||||
|
i32.store
|
||||||
|
end
|
||||||
|
end
|
||||||
|
;; return 1;
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
end
|
end
|
||||||
i32.const 0
|
;;
|
||||||
|
i32.const 1
|
||||||
|
)
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; Private functions
|
||||||
|
;;
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; unsigned int addCell(idown, iright, vseg)
|
||||||
|
;;
|
||||||
|
;; Add a new cell, return cell index.
|
||||||
|
;;
|
||||||
|
(func $addCell
|
||||||
|
(param $idown i32)
|
||||||
|
(param $iright i32)
|
||||||
|
(param $vseg i32)
|
||||||
|
(result i32) ;; result: index of added cell
|
||||||
|
(local $icell i32)
|
||||||
|
;;
|
||||||
|
;; let icell = this.buf32[HNBIGTRIE_TRIE1_SLOT];
|
||||||
|
;; this.buf32[HNBIGTRIE_TRIE1_SLOT] = icell + 12;
|
||||||
|
i32.const 260
|
||||||
|
i32.const 260
|
||||||
|
i32.load
|
||||||
|
tee_local $icell
|
||||||
|
i32.const 12
|
||||||
|
i32.add
|
||||||
|
i32.store
|
||||||
|
;; this.buf32[icell+0] = idown;
|
||||||
|
get_local $icell
|
||||||
|
get_local $idown
|
||||||
|
i32.store
|
||||||
|
;; this.buf32[icell+1] = iright;
|
||||||
|
get_local $icell
|
||||||
|
get_local $iright
|
||||||
|
i32.store offset=4
|
||||||
|
;; this.buf32[icell+2] = v;
|
||||||
|
get_local $icell
|
||||||
|
get_local $vseg
|
||||||
|
i32.store offset=8
|
||||||
|
;; return icell;
|
||||||
|
get_local $icell
|
||||||
|
i32.const 2
|
||||||
|
i32.shr_u
|
||||||
|
)
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; unsigned int addSegment(lsegchar)
|
||||||
|
;;
|
||||||
|
;; Store a segment of characters and return a segment descriptor. The segment
|
||||||
|
;; is created from the character data in the needle buffer.
|
||||||
|
;;
|
||||||
|
(func $addSegment
|
||||||
|
(param $lsegchar i32)
|
||||||
|
(result i32) ;; result: segment descriptor
|
||||||
|
(local $char1 i32) ;; offset to end of character data section
|
||||||
|
(local $isegchar i32) ;; relative offset to first character of segment
|
||||||
|
(local $i i32) ;; iterator
|
||||||
|
;;
|
||||||
|
;; if ( lsegchar === 0 ) { return 0; }
|
||||||
|
get_local $lsegchar
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const 0
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; let char1 = this.buf32[HNBIGTRIE_CHAR1_SLOT];
|
||||||
|
i32.const 268
|
||||||
|
i32.load
|
||||||
|
tee_local $char1
|
||||||
|
;; const isegchar = char1 - this.buf32[HNBIGTRIE_CHAR0_SLOT];
|
||||||
|
i32.const 264
|
||||||
|
i32.load
|
||||||
|
i32.sub
|
||||||
|
set_local $isegchar
|
||||||
|
;; let i = lsegchar;
|
||||||
|
get_local $lsegchar
|
||||||
|
set_local $i
|
||||||
|
;; do {
|
||||||
|
block $endOfSegment loop
|
||||||
|
;; this.buf[char1++] = this.buf[--i];
|
||||||
|
get_local $char1
|
||||||
|
get_local $i
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
tee_local $i
|
||||||
|
i32.load8_u
|
||||||
|
i32.store8
|
||||||
|
get_local $char1
|
||||||
|
i32.const 1
|
||||||
|
i32.add
|
||||||
|
set_local $char1
|
||||||
|
;; } while ( i !== 0 );
|
||||||
|
get_local $i
|
||||||
|
i32.eqz
|
||||||
|
br_if $endOfSegment
|
||||||
|
br 0
|
||||||
|
end end
|
||||||
|
;; this.buf32[HNBIGTRIE_CHAR1_SLOT] = char1;
|
||||||
|
i32.const 268
|
||||||
|
get_local $char1
|
||||||
|
i32.store
|
||||||
|
;; return (lsegchar << 24) | isegchar;
|
||||||
|
get_local $lsegchar
|
||||||
|
i32.const 24
|
||||||
|
i32.shl
|
||||||
|
get_local $isegchar
|
||||||
|
i32.or
|
||||||
)
|
)
|
||||||
|
|
||||||
;;
|
;;
|
||||||
|
|
Loading…
Reference in New Issue