3rd-gen hntrie, suitable for large set of hostnames

This commit is contained in:
Raymond Hill 2018-12-04 13:02:09 -05:00
parent bf28a83e2d
commit 1b6fea16da
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
9 changed files with 1697 additions and 620 deletions

View File

@ -0,0 +1,271 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body style="font: 14px sans-serif">
<h1>Benchmark of large hostname-lookup from small to large set: Set, HNTrie</h1>
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
<div id="results-0" style="white-space:pre;font-family:mono"></div>
<div id="results-1" style="white-space:pre;font-family:mono"></div>
<div id="results-2" style="white-space:pre;font-family:mono"></div>
<div id="results-3" style="white-space:pre;font-family:mono"></div>
<div id="results-4" style="white-space:pre;font-family:mono"></div>
<div id="results-5" style="white-space:pre;font-family:mono"></div>
<div id="results-6" style="white-space:pre;font-family:mono"></div>
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
<!-- <script src="../../src/js/hntrie.js"></script> -->
<script src="hostname-pool.js"></script>
<script src="https://cdn.jsdelivr.net/lodash/4.17.2/lodash.min.js"></script>
<script src="https://cdn.jsdelivr.net/platform.js/1.3.3/platform.js"></script>
<script src="https://cdn.jsdelivr.net/benchmarkjs/2.1.2/benchmark.js"></script>
<script>
const randomHostname = function() {
return hostnamePool[Math.floor(Math.random() * hostnamePool.length)];
};
const randomNeedle = function() {
let needle = randomHostname();
const pos = needle.lastIndexOf('.');
if ( pos !== -1 ) {
needle = Math.random().toString(36).slice(2) + needle.slice(pos);
}
if ( Math.random() < 0.5 ) {
needle = Math.random().toString(36).slice(2, 6) + '.' + needle;
}
return needle;
};
// Create hostname dictionary of all sizes (from 2 to 1024 at most)
const hostnameLists = (function() {
const dicts = [];
let n = hostnamePool.length;
while ( n > 1 ) {
const dict = [];
for ( let i = 0; i < n; i++ ) {
dict.push(randomHostname());
}
dicts.unshift(dict);
n = n >>> 2;
}
return dicts;
})();
/******************************************************************************/
var setBasedDictCreate = function(hostnames) {
return new Set(hostnames);
};
var setBasedDictTest = function(haystack, needle) {
for (;;) {
if ( haystack.has(needle) ) { return true; }
const pos = needle.indexOf('.');
if ( pos === -1 ) { break; }
needle = needle.slice(pos + 1);
}
return false;
};
/******************************************************************************/
const hnBigTrieJS = new HNTrieContainer();
const hnBigTrieWASM = new HNTrieContainer();
const trieBasedDictCreateJS = function(hostnames) {
return hnBigTrieJS.fromIterable(hostnames, 'addJS');
}
const trieBasedDictTest = function(haystack, needle) {
return haystack.matchesJS(needle);
};
const trieBasedDictCreateWASM = function(hostnames) {
return hnBigTrieWASM.fromIterable(hostnames, 'addWASM');
}
const trieBasedDictTestWASM = function(haystack, needle) {
return haystack.matchesWASM(needle);
};
/******************************************************************************/
const gBenchmarks = [ null ];
let gWhich;
/******************************************************************************/
function stdout(which, text) {
if ( which > 0 ) {
which = ((which - 1) % 3) + 1;
}
var r = document.querySelector('#results-' + which);
if ( text === '' ) {
r.innerHTML = '';
} else {
r.innerHTML += text;
}
}
function doBenchmark(which) {
stdout(0, '');
stdout(0, 'Benchmarking, the higher ops/sec the better.\n');
stdout(0, Benchmark.platform.toString() + '.');
stdout(0, '\n\n');
stdout(1, '');
stdout(2, '');
stdout(3, '');
gWhich = which;
gBenchmarks[gWhich].run({ 'async': true });
}
function nextBenchmark() {
stdout(gWhich, 'Done.\n\n');
gWhich += 1;
var bms = gBenchmarks[gWhich];
if ( bms ) {
bms.run({ 'async': true });
}
}
function exitBenchmark() {
stdout(gWhich, 'Done.\n\n');
}
/******************************************************************************/
function initBenchmarks() {
gBenchmarks.push((function() {
let dicts = [];
let bigTrieDictsSerialized;
const createDict = function(fn) {
const out = [];
for ( let i = 0; i < hostnameLists.length; i++ ) {
out[i] = fn(hostnameLists[i]);
}
return out;
};
var bms = new Benchmark.Suite();
bms
.add(' - Set-based', function() {
dicts = createDict(setBasedDictCreate);
})
.add(' - Trie-based (JS)', function() {
hnBigTrieJS.reset();
dicts = createDict(trieBasedDictCreateJS);
})
.add(' - Trie-based (WASM)', function() {
hnBigTrieWASM.reset();
dicts = createDict(trieBasedDictCreateWASM);
})
.add(' - Trie-based (unserialized)', function() {
hnBigTrieJS.reset();
hnBigTrieJS.unserialize(bigTrieDictsSerialized);
})
.on('start', function() {
hnBigTrieJS.reset();
createDict(trieBasedDictCreateJS);
bigTrieDictsSerialized = hnBigTrieJS.serialize();
stdout(gWhich, '');
stdout(gWhich, 'Create dictionaries\n');
})
.on('cycle', function(event) {
stdout(gWhich, String(event.target) + '\n');
})
.on('complete', function() {
dicts = [];
bigTrieDictsSerialized = undefined;
exitBenchmark();
});
return bms;
})());
const lookupCount = 100;
gBenchmarks.push((function() {
const bms = new Benchmark.Suite();
const needles = [];
let setDicts = [];
let bigTrieDicts = [];
let results;
const lookupDict = function(dicts, fn) {
for ( let i = 0; i < needles.length; i++ ) {
const needle = needles[i];
for ( const dict of dicts ) {
results[i] = fn(dict, needle);
}
}
};
bms
.add(' - Set-based', function() {
lookupDict(setDicts, setBasedDictTest);
})
.add(' - Trie-based JS', function() {
lookupDict(bigTrieDicts, trieBasedDictTest);
})
.add(' - Trie-based WASM', function() {
lookupDict(bigTrieDicts, trieBasedDictTestWASM);
})
.on('start', function() {
for ( let i = 0; i < lookupCount; i++ ) {
needles[i] = randomNeedle();
}
setDicts = [];
bigTrieDicts = [];
results = [];
hnBigTrieJS.reset();
for ( const hostnameList of hostnameLists ) {
setDicts.push(setBasedDictCreate(hostnameList));
bigTrieDicts.push(trieBasedDictCreateJS(hostnameList));
}
hnBigTrieJS.optimize();
stdout(gWhich, '');
stdout(
gWhich,
'Test ' + lookupCount +
' needles against ' + setDicts.length +
' dictionaries with size between ' + hostnameLists[0].length +
' and ' + hostnameLists[hostnameLists.length-1].length +
' hostnames\n'
);
})
.on('cycle', function(event) {
stdout(gWhich, String(event.target) + '\n');
})
.on('complete', ( ) => {
setDicts = bigTrieDicts = results = [];
hnBigTrieJS.reset();
exitBenchmark();
});
return bms;
})());
}
/******************************************************************************/
Promise.all([
hnBigTrieJS.readyToUse(),
hnBigTrieWASM.readyToUse()
]).then(( ) => {
initBenchmarks();
});
document.getElementById('createBenchmark').onclick = function() {
doBenchmark(1);
};
document.getElementById('lookupBenchmark').onclick = function() {
doBenchmark(2);
};
</script>
</body>
</html>

View File

@ -5,7 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
</head> </head>
<body style="font: 14px sans-serif"> <body style="font: 14px sans-serif">
<h1>Benchmark of hostname-lookup data structures: Set, RegExp, HNTrie</h1> <h1>Benchmark of hostname-lookup from small to medium set: Set, RegExp, HNTrie</h1>
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p> <p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
<div id="results-0" style="white-space:pre;font-family:mono"></div> <div id="results-0" style="white-space:pre;font-family:mono"></div>
<div id="results-1" style="white-space:pre;font-family:mono"></div> <div id="results-1" style="white-space:pre;font-family:mono"></div>
@ -17,6 +17,7 @@
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script> <script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
<script src="https://rawcdn.githack.com/gorhill/uBlock/c3b0fd31f64bd7ffecdd282fb1208fe07aac3eb0/src/js/hntrie.js"></script> <script src="https://rawcdn.githack.com/gorhill/uBlock/c3b0fd31f64bd7ffecdd282fb1208fe07aac3eb0/src/js/hntrie.js"></script>
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
<!-- <script src="../../src/js/hntrie.js"></script> --> <!-- <script src="../../src/js/hntrie.js"></script> -->
<script src="hostname-pool.js"></script> <script src="hostname-pool.js"></script>
@ -202,6 +203,27 @@ var trieBasedDictTestWASM = function(haystack, needle) {
/******************************************************************************/ /******************************************************************************/
const hnBigTrieJS = new HNTrieContainer();
const hnBigTrieWASM = new HNTrieContainer();
const bigtrieBasedDictCreateJS = function(domainOpt) {
return hnBigTrieJS.fromIterable(domainOpt.split('|'), 'addJS');
}
const bigtrieBasedDictTestJS = function(haystack, needle) {
return haystack.matchesJS(needle);
};
const bigtrieBasedDictCreateWASM = function(domainOpt) {
return hnBigTrieWASM.fromIterable(domainOpt.split('|'), 'addWASM');
}
const bigtrieBasedDictTestWASM = function(haystack, needle) {
return haystack.matchesWASM(needle);
};
/******************************************************************************/
const gBenchmarks = [ null ]; const gBenchmarks = [ null ];
let gWhich; let gWhich;
@ -258,19 +280,23 @@ function initBenchmarks() {
var bms = new Benchmark.Suite(); var bms = new Benchmark.Suite();
bms bms
.add(' - Set-based', function() { .add(' - Set-based', function() {
createDict(setBasedDictCreate); createDict(setBasedDictCreate);
}) })
.add(' - Regex-based', function() { .add(' - Regex-based', function() {
createDict(regexBasedDictCreate); createDict(regexBasedDictCreate);
}) })
.add(' - Trie-based (1st-gen)', function() { .add(' - Trie-based (1st-gen)', function() {
createDict(oldTrieBasedDictCreate); createDict(oldTrieBasedDictCreate);
}) })
.add(' - Trie-based (2nd-gen)', function() { .add(' - Trie-based (2nd-gen)', function() {
hnTrieManager.reset(); hnTrieManager.reset();
createDict(trieBasedDictCreate); createDict(trieBasedDictCreate);
}) })
.add(' - Trie-based JS (3rd-gen)', function() {
hnBigTrieJS.reset();
createDict(bigtrieBasedDictCreateJS);
})
.on('start', function() { .on('start', function() {
dicts = []; dicts = [];
stdout(gWhich, ''); stdout(gWhich, '');
@ -281,6 +307,13 @@ function initBenchmarks() {
}) })
.on('complete', exitBenchmark); .on('complete', exitBenchmark);
if ( hnBigTrieWASM.addWASM !== null ) {
bms.add(' - Trie-based WASM (3rd-gen)', function() {
hnBigTrieWASM.reset();
createDict(bigtrieBasedDictCreateWASM);
})
}
return bms; return bms;
})()); })());
@ -294,6 +327,7 @@ function initBenchmarks() {
let regexDicts; let regexDicts;
let oldTrieDicts; let oldTrieDicts;
let newTrieDicts; let newTrieDicts;
let bigTrieDicts;
let results; let results;
const lookupDict = function(dicts, fn) { const lookupDict = function(dicts, fn) {
@ -315,9 +349,6 @@ function initBenchmarks() {
.add(' - Trie-based (1st-gen)', function() { .add(' - Trie-based (1st-gen)', function() {
lookupDict(oldTrieDicts, oldTrieBasedDictTest); lookupDict(oldTrieDicts, oldTrieBasedDictTest);
}) })
.add(' - Trie-based JS (2nd-gen)', function() {
lookupDict(newTrieDicts, trieBasedDictTest);
})
.on('start', function() { .on('start', function() {
for ( let i = 0; i < lookupCount; i++ ) { for ( let i = 0; i < lookupCount; i++ ) {
needles[i] = randomNeedle(); needles[i] = randomNeedle();
@ -326,6 +357,7 @@ function initBenchmarks() {
regexDicts = []; regexDicts = [];
oldTrieDicts = [] oldTrieDicts = []
newTrieDicts = [] newTrieDicts = []
bigTrieDicts = []
results = []; results = [];
hnTrieManager.reset(); hnTrieManager.reset();
for ( const domainOpt of domainOpts ) { for ( const domainOpt of domainOpts ) {
@ -333,6 +365,7 @@ function initBenchmarks() {
regexDicts.push(regexBasedDictCreate(domainOpt)); regexDicts.push(regexBasedDictCreate(domainOpt));
oldTrieDicts.push(oldTrieBasedDictCreate(domainOpt)); oldTrieDicts.push(oldTrieBasedDictCreate(domainOpt));
newTrieDicts.push(trieBasedDictCreate(domainOpt)); newTrieDicts.push(trieBasedDictCreate(domainOpt));
bigTrieDicts.push(bigtrieBasedDictCreateJS(domainOpt));
} }
stdout(gWhich, ''); stdout(gWhich, '');
@ -347,11 +380,22 @@ function initBenchmarks() {
exitBenchmark(); exitBenchmark();
}); });
bms.add(' - Trie-based JS (2nd-gen)', function() {
lookupDict(newTrieDicts, trieBasedDictTest);
})
if ( hnTrieManager.matchesWASM !== null ) { if ( hnTrieManager.matchesWASM !== null ) {
bms.add(' - Trie-based WASM (2nd-gen)', function() { bms.add(' - Trie-based WASM (2nd-gen)', function() {
lookupDict(newTrieDicts, trieBasedDictTestWASM); lookupDict(newTrieDicts, trieBasedDictTestWASM);
}) })
} }
bms.add(' - Trie-based JS (3rd-gen)', function() {
lookupDict(newTrieDicts, bigtrieBasedDictTestJS);
})
if ( hnBigTrieWASM.matchesWASM !== null ) {
bms.add(' - Trie-based WASM (3rd-gen)', function() {
lookupDict(bigTrieDicts, bigtrieBasedDictTestWASM);
})
}
return bms; return bms;
})()); })());
@ -361,6 +405,8 @@ function initBenchmarks() {
Promise.all([ Promise.all([
hnTrieManager.readyToUse(), hnTrieManager.readyToUse(),
hnBigTrieJS.readyToUse(),
hnBigTrieWASM.readyToUse(),
]).then(( ) => { ]).then(( ) => {
initBenchmarks(); initBenchmarks();
}); });

View File

@ -5,10 +5,9 @@
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
</head> </head>
<body style="font: 14px sans-serif"> <body style="font: 14px sans-serif">
<h1>HNTrie test</h1> <h1>HNTrieContainer test</h1>
<div><button id="test" type="button">Test!</button></div> <div><button id="test" type="button">Test!</button></div>
<div id="stdout"></div> <div id="stdout"></div>
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script> <script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
<!-- <script src="../../src/js/hntrie.js"></script> --> <!-- <script src="../../src/js/hntrie.js"></script> -->
<script src="hostname-pool.js"></script> <script src="hostname-pool.js"></script>
@ -35,7 +34,45 @@ const stdout = function(s) {
/******************************************************************************/ /******************************************************************************/
const testFlavor = function(hostnames, name, matchesFn, hit, miss) { // Dictionary of hostnames
//
const FilterHostnameDict = function(hostnames) {
this.h = ''; // short-lived register
this.dict = new Set();
if ( hostnames !== undefined ) {
this.fromIterable(hostnames);
}
};
FilterHostnameDict.prototype = {
add: function(hn) {
if ( this.dict.has(hn) ) { return false; }
this.dict.add(hn);
return true;
},
fromIterable: function(hostnames) {
for ( let hn of hostnames ) {
this.add(hn);
}
return this;
},
matches: function(needle) {
while ( this.dict.has(needle) === false ) {
const pos = needle.indexOf('.');
if ( pos === -1 ) {
this.h = '';
return false;
}
needle = needle.slice(pos + 1);
}
this.h = needle;
return true;
},
};
/******************************************************************************/
const testFlavor = function(hostnames, name, matchesFn, hitFn) {
stdout('\xA0'); stdout('\xA0');
stdout('Testing ' + name + '...'); stdout('Testing ' + name + '...');
@ -44,25 +81,25 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
for ( let i = 0; i < hostnames.length; i++ ) { for ( let i = 0; i < hostnames.length; i++ ) {
// Exact hits // Exact hits
let needle = hostnames[i]; let needle = hostnames[i];
if ( matchesFn(needle) !== hit ) { if ( hitFn(matchesFn(needle)) === false ) {
stdout('Exact hits failed: ' + needle); stdout('Exact hits failed: ' + needle);
} }
// Subdomain hits // Subdomain hits
needle = createRandomLabel() + '.' + hostnames[i]; needle = createRandomLabel() + '.' + hostnames[i];
if ( matchesFn(needle) !== hit ) { if ( hitFn(matchesFn(needle)) === false ) {
stdout('Subdomain hits failed: ' + needle); stdout('Subdomain hits failed: ' + needle);
} }
// Misses batch 1 // Misses batch 1
needle = createRandomLabel() + '.com'; needle = createRandomLabel() + '.com';
if ( matchesFn(needle) !== miss ) { if ( hitFn(matchesFn(needle)) !== false ) {
stdout('Misses batch 1: ' + needle); stdout('Misses batch 1: ' + needle);
} }
// Misses batch 2 // Misses batch 2
needle = hostnames[i] + '.' + createRandomLabel(); needle = hostnames[i] + '.' + createRandomLabel();
if ( matchesFn(needle) !== miss ) { if ( hitFn(matchesFn(needle)) !== false ) {
stdout('Misses batch 2: ' + needle); stdout('Misses batch 2: ' + needle);
} }
@ -71,7 +108,7 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
let pos = needle.lastIndexOf('.'); let pos = needle.lastIndexOf('.');
if ( pos !== -1 ) { if ( pos !== -1 ) {
needle = needle.slice(0, pos) + needle.slice(pos + 1); needle = needle.slice(0, pos) + needle.slice(pos + 1);
if ( matchesFn(needle) !== miss ) { if ( hitFn(matchesFn(needle)) !== false ) {
stdout('Misses batch 3: ' + needle); stdout('Misses batch 3: ' + needle);
} }
} }
@ -87,19 +124,98 @@ const testFlavor = function(hostnames, name, matchesFn, hit, miss) {
); );
}; };
hnTrieManager.readyToUse().then(( ) => { const hnBigTrieJS = new HNTrieContainer();
const oldTrie = HNTrieBuilder.fromIterable(hostnamePool); const hnBigTrieWASM = new HNTrieContainer();
const theTrie = hnTrieManager.fromIterable(hostnamePool); const hnBigTrieUnserialized = new HNTrieContainer();
Promise.all([
hnBigTrieJS.readyToUse(),
hnBigTrieWASM.readyToUse()
]).then(( ) => {
let t0 = performance.now();
const theSet = new FilterHostnameDict(hostnamePool);
let t1 = performance.now();
stdout('\xA0');
stdout(
'Set creation completed in ' +
(t1 - t0).toFixed(2) + ' ms'
);
t0 = performance.now();
const theTrieJS = hnBigTrieJS.fromIterable(hostnamePool, 'addJS');
hnBigTrieJS.optimize();
t1 = performance.now();
stdout('\xA0');
stdout(
'HNTrieContainer creation (JS) completed in ' +
(t1 - t0).toFixed(2) + ' ms'
);
let theTrieWASM;
if ( hnBigTrieWASM.addWASM instanceof Function ) {
t0 = performance.now();
theTrieWASM = hnBigTrieWASM.fromIterable(hostnamePool, 'addWASM');
hnBigTrieWASM.optimize();
t1 = performance.now();
stdout('\xA0');
stdout(
'HNTrieContainer creation (WASM) completed in ' +
(t1 - t0).toFixed(2) + ' ms'
);
const bufJS = theTrieJS.container.buf;
const bufWASM = theTrieWASM.container.buf;
for ( let i = 0; i < bufJS.length; i++ ) {
if ( bufJS[i] !== bufWASM[i] ) {
stdout('theTrieWASM failure at index ' + i);
break;
}
}
}
let selfie = hnBigTrieJS.serialize();
t0 = performance.now();
hnBigTrieUnserialized.unserialize(selfie);
const theTrieUnserialized = hnBigTrieUnserialized.createOne(hnBigTrieJS.compileOne(theTrieJS));
t1 = performance.now();
stdout('\xA0');
stdout(
'HNTrieContainer creation (unserialized) completed in ' +
(t1 - t0).toFixed(2) + ' ms'
);
selfie = undefined;
document.getElementById('test').addEventListener('click', ( ) => { document.getElementById('test').addEventListener('click', ( ) => {
let parent = document.getElementById('stdout'); let parent = document.getElementById('stdout');
while ( parent.childElementCount !== 0 ) { while ( parent.childElementCount !== 0 ) {
parent.removeChild(parent.firstChild); parent.removeChild(parent.firstChild);
} }
testFlavor(hostnamePool, 'Old Trie (JS)', oldTrie.matches.bind(oldTrie), true, false); testFlavor(
testFlavor(hostnamePool, 'New Trie (JS)', theTrie.matchesJS.bind(theTrie), 1, 0); hostnamePool,
if ( hnTrieManager.matchesWASM instanceof Function ) { 'Set (JS)',
testFlavor(hostnamePool, 'New Trie (WASM)', theTrie.matchesWASM.bind(theTrie), 1, 0); theSet.matches.bind(theSet),
r => r
);
testFlavor(
hostnamePool,
'HNTrieContainer (JS)',
theTrieJS.matchesJS.bind(theTrieJS),
r => r >= 0
);
if ( theTrieWASM !== undefined ) {
testFlavor(
hostnamePool,
'HNTrieContainer (WASM)',
theTrieWASM.matchesWASM.bind(theTrieWASM),
r => r >= 0
);
} }
testFlavor(
hostnamePool,
'HNTrieContainer (unserialized)',
theTrieUnserialized.matchesJS.bind(theTrieUnserialized),
r => r >= 0
);
}); });
}); });

View File

@ -10,7 +10,8 @@
<p>Some of the pages below are hosted on <a href="raw.githack.com">raw.githack.com</a> in order to ensure some of the secondary resources can be properly loaded (specifically, the WebAssembly modules, as they <a href="https://github.com/WebAssembly/design/blob/master/Web.md#webassemblyinstantiatestreaming">require to be loaded using same-origin policy</a>).</p> <p>Some of the pages below are hosted on <a href="raw.githack.com">raw.githack.com</a> in order to ensure some of the secondary resources can be properly loaded (specifically, the WebAssembly modules, as they <a href="https://github.com/WebAssembly/design/blob/master/Web.md#webassemblyinstantiatestreaming">require to be loaded using same-origin policy</a>).</p>
<ul> <ul>
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hntrie-test.html">HNTrie: tests</a> <li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hntrie-test.html">HNTrie: tests</a>
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnset-benchmark.html">HNTrie: benchmarks</a> <li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnset-benchmark.html">HNTrie, small (2) to medium (~1000) set: benchmarks</a>
<li><a href="https://raw.githack.com/gorhill/uBlock/master/docs/tests/hnbigset-benchmark.html">HNTrie, small (2) to large (40,000+) set: benchmarks</a>
</ul> </ul>
</body> </body>
</html> </html>

View File

@ -137,7 +137,7 @@ const µBlock = (function() { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 6, // Increase when compiled format changes compiledMagic: 6, // Increase when compiled format changes
selfieMagic: 6 // Increase when selfie format changes selfieMagic: 7 // Increase when selfie format changes
}, },
restoreBackupSettings: { restoreBackupSettings: {

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,7 @@
*/ */
/* jshint bitwise: false */ /* jshint bitwise: false */
/* global punycode, hnTrieManager */ /* global punycode, HNTrieContainer */
'use strict'; 'use strict';
@ -738,42 +738,40 @@ registerFilterClass(FilterRegex);
const FilterOrigin = function() { const FilterOrigin = function() {
}; };
FilterOrigin.prototype.wrapped = { FilterOrigin.prototype = {
compile: function() { wrapped: {
return ''; compile: function() {
return '';
},
logData: function() {
return {
compiled: ''
};
},
match: function() {
return true;
}
},
matchOrigin: function() {
return true;
},
match: function(url, tokenBeg) {
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
}, },
logData: function() { logData: function() {
return { const out = this.wrapped.logData();
compiled: '' const domainOpt = this.toDomainOpt();
}; out.compiled = [ this.fid, domainOpt, out.compiled ];
if ( out.opts === undefined ) {
out.opts = 'domain=' + domainOpt;
} else {
out.opts += ',domain=' + domainOpt;
}
return out;
},
compile: function() {
return [ this.fid, this.toDomainOpt(), this.wrapped.compile() ];
}, },
match: function() {
return true;
}
};
FilterOrigin.prototype.matchOrigin = function() {
return true;
};
FilterOrigin.prototype.match = function(url, tokenBeg) {
return this.matchOrigin() && this.wrapped.match(url, tokenBeg);
};
FilterOrigin.prototype.logData = function() {
var out = this.wrapped.logData(),
domainOpt = this.toDomainOpt();
out.compiled = [ this.fid, domainOpt, out.compiled ];
if ( out.opts === undefined ) {
out.opts = 'domain=' + domainOpt;
} else {
out.opts += ',domain=' + domainOpt;
}
return out;
};
FilterOrigin.prototype.compile = function() {
return [ this.fid, this.toDomainOpt(), this.wrapped.compile() ];
}; };
// *** start of specialized origin matchers // *** start of specialized origin matchers
@ -853,10 +851,12 @@ FilterOriginHitSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( hnTrieManager.isValidRef(this.oneOf) === false ) { if ( this.oneOf === null ) {
this.oneOf = hnTrieManager.fromDomainOpt(this.domainOpt); this.oneOf = FilterOrigin.trieContainer.fromIterable(
this.domainOpt.split('|')
);
} }
return this.oneOf.matches(pageHostnameRegister) === 1; return this.oneOf.matches(pageHostnameRegister) !== -1;
} }
}, },
}); });
@ -885,12 +885,12 @@ FilterOriginMissSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( hnTrieManager.isValidRef(this.noneOf) === false ) { if ( this.noneOf === null ) {
this.noneOf = hnTrieManager.fromDomainOpt( this.noneOf = FilterOrigin.trieContainer.fromIterable(
this.domainOpt.replace(/~/g, '') this.domainOpt.replace(/~/g, '').split('|')
); );
} }
return this.noneOf.matches(pageHostnameRegister) === 0; return this.noneOf.matches(pageHostnameRegister) === -1;
} }
}, },
}); });
@ -926,8 +926,8 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
oneOf.push(hostname); oneOf.push(hostname);
} }
} }
this.oneOf = hnTrieManager.fromIterable(oneOf); this.oneOf = FilterOrigin.trieContainer.fromIterable(oneOf);
this.noneOf = hnTrieManager.fromIterable(noneOf); this.noneOf = FilterOrigin.trieContainer.fromIterable(noneOf);
} }
}, },
toDomainOpt: { toDomainOpt: {
@ -937,12 +937,10 @@ FilterOriginMixedSet.prototype = Object.create(FilterOrigin.prototype, {
}, },
matchOrigin: { matchOrigin: {
value: function() { value: function() {
if ( hnTrieManager.isValidRef(this.oneOf) === false ) { if ( this.oneOf === null ) { this.init(); }
this.init();
}
let needle = pageHostnameRegister; let needle = pageHostnameRegister;
return this.oneOf.matches(needle) === 1 && return this.oneOf.matches(needle) !== -1 &&
this.noneOf.matches(needle) === 0; this.noneOf.matches(needle) === -1;
} }
}, },
}); });
@ -990,6 +988,33 @@ FilterOrigin.load = function(args) {
return f; return f;
}; };
FilterOrigin.trieContainer = (function() {
let trieDetails;
try {
trieDetails = JSON.parse(
vAPI.localStorage.getItem('FilterOrigin.trieDetails')
);
} catch(ex) {
}
return new HNTrieContainer(trieDetails);
})();
FilterOrigin.readyToUse = function() {
return FilterOrigin.trieContainer.readyToUse();
};
FilterOrigin.reset = function() {
return FilterOrigin.trieContainer.reset();
};
FilterOrigin.optimize = function() {
const trieDetails = FilterOrigin.trieContainer.optimize();
vAPI.localStorage.setItem(
'FilterOrigin.trieDetails',
JSON.stringify(trieDetails)
);
};
registerFilterClass(FilterOrigin); registerFilterClass(FilterOrigin);
/******************************************************************************/ /******************************************************************************/
@ -1059,60 +1084,66 @@ FilterDataHolderEntry.load = function(data) {
/******************************************************************************/ /******************************************************************************/
// Dictionary of hostnames // Dictionary of hostnames
//
const FilterHostnameDict = function() { const FilterHostnameDict = function(args) {
this.h = ''; // short-lived register this.h = ''; // short-lived register
this.dict = new Set(); this.dict = FilterHostnameDict.trieContainer.createOne(args);
}; };
Object.defineProperty(FilterHostnameDict.prototype, 'size', { FilterHostnameDict.prototype = {
get: function() { get size() {
return this.dict.size; return this.dict.size;
},
add: function(hn) {
return this.dict.add(hn);
},
match: function() {
const pos = this.dict.matches(requestHostnameRegister);
if ( pos === -1 ) { return false; }
this.h = requestHostnameRegister.slice(pos);
return true;
},
logData: function() {
return {
raw: '||' + this.h + '^',
regex: rawToRegexStr(this.h, 0) + '(?:[^%.0-9a-z_-]|$)',
compiled: this.h
};
},
compile: function() {
return [ this.fid, FilterHostnameDict.trieContainer.compileOne(this.dict) ];
},
};
FilterHostnameDict.trieContainer = (function() {
let trieDetails;
try {
trieDetails = JSON.parse(
vAPI.localStorage.getItem('FilterHostnameDict.trieDetails')
);
} catch(ex) {
} }
}); return new HNTrieContainer(trieDetails);
})();
FilterHostnameDict.prototype.add = function(hn) { FilterHostnameDict.readyToUse = function() {
if ( this.dict.has(hn) === true ) { return false; } return FilterHostnameDict.trieContainer.readyToUse();
this.dict.add(hn);
return true;
}; };
FilterHostnameDict.prototype.remove = function(hn) { FilterHostnameDict.reset = function() {
return this.dict.delete(hn); return FilterHostnameDict.trieContainer.reset();
}; };
FilterHostnameDict.prototype.match = function() { FilterHostnameDict.optimize = function() {
// TODO: mind IP addresses const trieDetails = FilterHostnameDict.trieContainer.optimize();
var pos, vAPI.localStorage.setItem(
hostname = requestHostnameRegister; 'FilterHostnameDict.trieDetails',
while ( this.dict.has(hostname) === false ) { JSON.stringify(trieDetails)
pos = hostname.indexOf('.'); );
if ( pos === -1 ) {
this.h = '';
return false;
}
hostname = hostname.slice(pos + 1);
}
this.h = hostname;
return true;
};
FilterHostnameDict.prototype.logData = function() {
return {
raw: '||' + this.h + '^',
regex: rawToRegexStr(this.h, 0) + '(?:[^%.0-9a-z_-]|$)',
compiled: this.h
};
};
FilterHostnameDict.prototype.compile = function() {
return [ this.fid, Array.from(this.dict) ];
}; };
FilterHostnameDict.load = function(args) { FilterHostnameDict.load = function(args) {
var f = new FilterHostnameDict(); return new FilterHostnameDict(args[1]);
f.dict = new Set(args[1]);
return f;
}; };
registerFilterClass(FilterHostnameDict); registerFilterClass(FilterHostnameDict);
@ -1974,7 +2005,8 @@ FilterContainer.prototype.reset = function() {
this.filterParser.reset(); this.filterParser.reset();
// This will invalidate all hn tries throughout uBO: // This will invalidate all hn tries throughout uBO:
hnTrieManager.reset(); FilterOrigin.reset();
FilterHostnameDict.reset();
// Runtime registers // Runtime registers
this.cbRegister = undefined; this.cbRegister = undefined;
@ -1985,20 +2017,20 @@ FilterContainer.prototype.reset = function() {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.freeze = function() { FilterContainer.prototype.freeze = function() {
let filterPairId = FilterPair.fid, const filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid, filterBucketId = FilterBucket.fid,
filterDataHolderId = FilterDataHolder.fid, filterDataHolderId = FilterDataHolder.fid,
redirectTypeValue = typeNameToTypeValue.redirect, redirectTypeValue = typeNameToTypeValue.redirect,
unserialize = µb.CompiledLineIO.unserialize; unserialize = µb.CompiledLineIO.unserialize;
for ( let line of this.goodFilters ) { for ( const line of this.goodFilters ) {
if ( this.badFilters.has(line) ) { if ( this.badFilters.has(line) ) {
this.discardedCount += 1; this.discardedCount += 1;
continue; continue;
} }
let args = unserialize(line); const args = unserialize(line);
let bits = args[0]; const bits = args[0];
// Special cases: delegate to more specialized engines. // Special cases: delegate to more specialized engines.
// Redirect engine. // Redirect engine.
@ -2008,8 +2040,8 @@ FilterContainer.prototype.freeze = function() {
} }
// Plain static filters. // Plain static filters.
let tokenHash = args[1]; const tokenHash = args[1];
let fdata = args[2]; const fdata = args[2];
// Special treatment: data-holding filters are stored separately // Special treatment: data-holding filters are stored separately
// because they require special matching algorithm (unlike other // because they require special matching algorithm (unlike other
@ -2063,6 +2095,8 @@ FilterContainer.prototype.freeze = function() {
this.filterParser.reset(); this.filterParser.reset();
this.goodFilters = new Set(); this.goodFilters = new Set();
FilterOrigin.optimize();
FilterHostnameDict.optimize();
this.frozen = true; this.frozen = true;
}; };
@ -2072,7 +2106,7 @@ FilterContainer.prototype.freeze = function() {
// on asynchronous operations (ex.: when loading a wasm module). // on asynchronous operations (ex.: when loading a wasm module).
FilterContainer.prototype.readyToUse = function() { FilterContainer.prototype.readyToUse = function() {
return hnTrieManager.readyToUse(); return Promise.resolve();
}; };
/******************************************************************************/ /******************************************************************************/
@ -2108,6 +2142,7 @@ FilterContainer.prototype.toSelfie = function() {
allowFilterCount: this.allowFilterCount, allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount, blockFilterCount: this.blockFilterCount,
discardedCount: this.discardedCount, discardedCount: this.discardedCount,
trieContainer: FilterHostnameDict.trieContainer.serialize(),
categories: categoriesToSelfie(this.categories), categories: categoriesToSelfie(this.categories),
dataFilters: dataFiltersToSelfie(this.dataFilters) dataFilters: dataFiltersToSelfie(this.dataFilters)
}; };
@ -2123,6 +2158,7 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
this.allowFilterCount = selfie.allowFilterCount; this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount; this.blockFilterCount = selfie.blockFilterCount;
this.discardedCount = selfie.discardedCount; this.discardedCount = selfie.discardedCount;
FilterHostnameDict.trieContainer.unserialize(selfie.trieContainer);
for ( let categoryEntry of selfie.categories ) { for ( let categoryEntry of selfie.categories ) {
let tokenMap = new Map(); let tokenMap = new Map();

Binary file not shown.

View File

@ -25,155 +25,178 @@
;; module start ;; module start
;; ;;
;; (func $log (import "imports" "log") (param i32 i32 i32)) (func $growBuf (import "imports" "growBuf"))
(memory (import "imports" "memory") 1) (memory (import "imports" "memory") 1)
;; Trie container
;;
;; Memory layout, byte offset:
;; 0-254: needle being processed
;; 255: length of needle
;; 256-259: offset to start of trie data section (=> trie0)
;; 260-263: offset to end of trie data section (=> trie1)
;; 264-267: offset to start of character data section (=> char0)
;; 268-271: offset to end of character data section (=> char1)
;; 272: start of trie data section
;;
;; ;;
;; Public functions ;; Public functions
;; ;;
;; ;;
;; unsigned int matches(offset) ;; unsigned int matches(icell)
;; ;;
;; Test whether the currently set needle matches the trie at specified offset. ;; Test whether the currently set needle matches the trie at specified trie
;; ;; offset.
;; Memory layout, byte offset:
;; 0-254: encoded needle (ASCII)
;; 255 : needle length
;; 256- : tries
;; ;;
(func (export "matches") (func (export "matches")
(param $itrie i32) (param $icell i32) ;; offset to root cell of the trie
(result i32) ;; result: 0 = miss, 1 = hit (result i32) ;; result = match index, -1 = miss
(local $ineedle i32) ;; current needle offset (local $char0 i32) ;; offset to first character data
(local $nchar i32) ;; needle char being processed (local $ineedle i32) ;; current needle offset
(local $tchar i32) ;; trie char being processed (local $c i32)
(local $lxtra i32) (local $v i32)
(local $ixtra i32) (local $n i32)
i32.const 255 (local $i0 i32)
(local $i1 i32)
;;
i32.const 264 ;; start of char section is stored at addr 264
i32.load
set_local $char0
;; $icell is an index into an array of 32-bit values
get_local $icell
i32.const 2
i32.shl
set_local $icell
;; let ineedle = this.buf[255];
i32.const 255 ;; addr of needle is stored at addr 255
i32.load8_u i32.load8_u
set_local $ineedle set_local $ineedle
loop $nextNeedleChar ;; for (;;) {
block $noSegment loop $nextSegment
;; if ( ineedle === 0 ) { return -1; }
get_local $ineedle
i32.eqz
if
i32.const -1
return
end
;; ineedle -= 1; ;; ineedle -= 1;
get_local $ineedle get_local $ineedle
i32.const -1 i32.const -1
i32.add i32.add
tee_local $ineedle tee_local $ineedle
;; let nchar = ineedle === -1 ? 0 : buf[ineedle]; ;; let c = this.buf[ineedle];
i32.const 0 i32.load8_u
i32.lt_s set_local $c
if ;; for (;;) {
i32.const 0 block $foundSegment loop $findSegment
set_local $nchar ;; v = this.buf32[icell+2];
else get_local $icell
get_local $ineedle i32.load offset=8
tee_local $v
;; i0 = this.char0 + (v & 0x00FFFFFF);
i32.const 0x00FFFFFF
i32.and
get_local $char0
i32.add
tee_local $i0
;; if ( this.buf[i0] === c ) { break; }
i32.load8_u i32.load8_u
set_local $nchar get_local $c
end
block $trieCharEqNeedleChar loop $nextTrieChar
;; let tchar = buf[itrie+8];
get_local $itrie
i32.load8_u offset=8
tee_local $tchar
;; if ( tchar === nchar ) { break; }
get_local $nchar
i32.eq i32.eq
br_if $trieCharEqNeedleChar br_if $foundSegment
;; if ( tchar === 0 && nchar === 0x2E ) { return 1; } ;; icell = this.buf32[icell+0];
get_local $tchar get_local $icell
i32.eqz
if
get_local $nchar
i32.const 0x2E
i32.eq
if
i32.const 1
return
end
end
;; itrie = buf32[itrie >>> 2];
get_local $itrie
i32.load i32.load
tee_local $itrie i32.const 2
;; if ( itrie === 0 ) { return 0; } i32.shl
tee_local $icell
i32.eqz i32.eqz
if if
i32.const 0 i32.const -1
return return
end end
br $nextTrieChar br 0
end end end end
;; if ( nchar === 0 ) { return 1; } ;; let n = v >>> 24;
get_local $nchar get_local $v
i32.eqz i32.const 24
i32.shr_u
tee_local $n
;; if ( n > 1 ) {
i32.const 1
i32.gt_u
if if
i32.const 1 ;; n -= 1;
return get_local $n
end i32.const -1
;; let lxtra = buf[itrie+9]; i32.add
get_local $itrie tee_local $n
i32.load8_u offset=9 ;; if ( n > ineedle ) { return -1; }
tee_local $lxtra
i32.eqz
if else
;; if ( lxtra > ineedle ) { return 0; }
get_local $lxtra
get_local $ineedle get_local $ineedle
i32.gt_u i32.gt_u
if if
i32.const 0 i32.const -1
return return
end end
;; let ixtra = itrie + 10; get_local $i0
get_local $itrie i32.const 1
i32.const 10
i32.add i32.add
tee_local $ixtra tee_local $i0
;; lxtra += ixtra; ;; const i1 = i0 + n;
get_local $lxtra get_local $n
i32.add i32.add
set_local $lxtra set_local $i1
;; do { ;; do {
block $noMoreExtraChars loop loop
;; ineedle -= 1; ;; ineedle -= 1;
get_local $ineedle get_local $ineedle
i32.const -1 i32.const -1
i32.add i32.add
tee_local $ineedle tee_local $ineedle
;; if ( buf[ineedle] !== buf[ixtra] ) { return 0; } ;; if ( this.buf[i0] !== this.buf[ineedle] ) { return -1; }
i32.load8_u i32.load8_u
get_local $ixtra get_local $i0
i32.load8_u i32.load8_u
i32.ne i32.ne
if if
i32.const 0 i32.const -1
return return
end end
;; ixtra += 1; ;; i0 += 1;
get_local $ixtra get_local $i0
i32.const 1 i32.const 1
i32.add i32.add
tee_local $ixtra tee_local $i0
;; while ( ixtra !== lxtra ) { ;; } while ( i0 < i1 );
get_local $lxtra get_local $i1
i32.eq i32.lt_u
br_if $noMoreExtraChars br_if 0
br 0 end
end end
end end
;; itrie = buf32[itrie + 4 >>> 2]; ;; icell = this.buf32[icell+1];
get_local $itrie get_local $icell
i32.load offset=4 i32.load offset=4
tee_local $itrie i32.const 2
;; if ( itrie === 0 ) { i32.shl
tee_local $icell
;; if ( icell === 0 ) { break; }
i32.eqz
br_if $noSegment
;; if ( this.buf32[icell+2] === 0 ) {
get_local $icell
i32.load
i32.eqz i32.eqz
if if
;; return ineedle === 0 || buf[ineedle-1] === 0x2E ? 1 : 0; ;; if ( ineedle === 0 || this.buf[ineedle-1] === 0x2E ) {
;; return ineedle;
;; }
get_local $ineedle get_local $ineedle
i32.eqz i32.eqz
if if
i32.const 1 i32.const 0
return return
end end
get_local $ineedle get_local $ineedle
@ -183,15 +206,462 @@
i32.const 0x2E i32.const 0x2E
i32.eq i32.eq
if if
get_local $ineedle
return
end
;; icell = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
end
br 0
end end
;; return ineedle === 0 || this.buf[ineedle-1] === 0x2E ? ineedle : -1;
get_local $ineedle
i32.eqz
if
i32.const 0
return
end
get_local $ineedle
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
get_local $ineedle
return
end
i32.const -1
)
;;
;; unsigned int add(icell)
;;
;; Add a new hostname to a trie which root cell is passed as argument.
;;
(func (export "add")
(param $icell i32) ;; index of root cell of the trie
(result i32) ;; result: 0 not added, 1 = added
(local $lhnchar i32) ;; number of characters left to process in hostname
(local $char0 i32) ;; offset to start of character data section
(local $vseg i32) ;; integer value describing a segment
(local $isegchar0 i32) ;; offset to start of current segment's character data
(local $isegchar i32)
(local $lsegchar i32) ;; number of character in current segment
(local $inext i32) ;; index of next cell to process
;;
;; let lhnchar = this.buf[255];
i32.const 255
i32.load8_u
tee_local $lhnchar
;; if ( lhnchar === 0 ) { return 0; }
i32.eqz
if
i32.const 0
return
end
;; let icell = iroot;
get_local $icell
i32.const 2
i32.shl
tee_local $icell
;; if ( this.buf32[icell+2] === 0 ) {
i32.load offset=8
i32.eqz
if
;;this.buf32[icell+2] = this.addSegment(lhnchar);
;; return 1;
get_local $icell
get_local $lhnchar
call $addSegment
i32.store offset=8
i32.const 1
return
end
;; if (
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
;; ) {
;; this.growBuf();
;; }
i32.const 264
i32.load
i32.const 260
i32.load
i32.sub
i32.const 24
i32.lt_u
if
call $growBuf
else
memory.size
i32.const 16
i32.shl
i32.const 268
i32.load
i32.sub
i32.const 256
i32.lt_u
if
call $growBuf
end
end
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
i32.const 264
i32.load
set_local $char0
;; for (;;) {
loop $nextSegment
;; const v = this.buf32[icell+2];
get_local $icell
i32.load offset=8
tee_local $vseg
;; if ( vseg === 0 ) {
i32.eqz
if
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $icell
br $nextSegment
end
;; let isegchar0 = char0 + (vseg & 0x00FFFFFF);
get_local $char0
get_local $vseg
i32.const 0x00FFFFFF
i32.and
i32.add
tee_local $isegchar0
;; if ( this.buf[isegchar0] !== this.buf[lhnchar-1] ) {
i32.load8_u
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.ne
if
;; inext = this.buf32[icell+0];
get_local $icell
i32.load
i32.const 2
i32.shl
tee_local $inext
;; if ( inext === 0 ) {
i32.eqz
if
;; this.buf32[icell+0] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $icell
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
;; return 1;
i32.const 1 i32.const 1
return return
end end
i32.const 0 ;; icell = inext;
return get_local $inext
set_local $icell
br $nextSegment
end end
br 0 ;; let isegchar = 1;
i32.const 1
set_local $isegchar
;; lhnchar -= 1;
get_local $lhnchar
i32.const -1
i32.add
set_local $lhnchar
;; const lsegchar = vseg >>> 24;
get_local $vseg
i32.const 24
i32.shr_u
tee_local $lsegchar
;; if ( lsegchar !== 1 ) {
i32.const 1
i32.ne
if
;; for (;;) {
block $mismatch loop
;; if ( isegchar === lsegchar ) { break; }
get_local $isegchar
get_local $lsegchar
i32.eq
br_if $mismatch
get_local $lhnchar
i32.eqz
br_if $mismatch
;; if ( this.buf[isegchar0+isegchar] !== this.buf[lhnchar-1] ) { break; }
get_local $isegchar0
get_local $isegchar
i32.add
i32.load8_u
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.ne
br_if $mismatch
;; isegchar += 1;
get_local $isegchar
i32.const 1
i32.add
set_local $isegchar
;; lhnchar -= 1;
get_local $lhnchar
i32.const -1
i32.add
set_local $lhnchar
br 0
end end
end
;; if ( isegchar === lsegchar ) {
get_local $isegchar
get_local $lsegchar
i32.eq
if
;; inext = this.buf32[icell+1];
get_local $icell
i32.load offset=4
i32.const 2
i32.shl
set_local $inext
;; if ( lhnchar === 0 ) {
get_local $lhnchar
i32.eqz
if
;; if ( inext === 0 || this.buf32[inext+2] === 0 ) { return 0; }
get_local $inext
i32.eqz
if
i32.const 0
return
end
get_local $inext
i32.load offset=8
i32.eqz
if
i32.const 0
return
end
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
get_local $icell
i32.const 0
get_local $inext
i32.const 2
i32.shr_u
i32.const 0
call $addCell
i32.store offset=4
else
;; if ( inext !== 0 ) {
get_local $inext
i32.eqz
if else
;; icell = inext;
get_local $inext
set_local $icell
br $nextSegment
end
;; inext = this.addCell(0, 0, 0);
;; this.buf32[icell+1] = inext;
get_local $icell
i32.const 0
i32.const 0
i32.const 0
call $addCell
tee_local $inext
i32.store offset=4
;; this.buf32[inext+1] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store offset=4
end
else
;; isegchar0 -= char0;
get_local $icell
get_local $isegchar0
get_local $char0
i32.sub
tee_local $isegchar0
;; this.buf32[icell+2] = isegchar << 24 | isegchar0;
get_local $isegchar
i32.const 24
i32.shl
i32.or
i32.store offset=8
;; inext = this.addCell(
;; 0,
;; this.buf32[icell+1],
;; lsegchar - isegchar << 24 | isegchar0 + isegchar
;; );
;; this.buf32[icell+1] = inext;
get_local $icell
i32.const 0
get_local $icell
i32.load offset=4
get_local $lsegchar
get_local $isegchar
i32.sub
i32.const 24
i32.shl
get_local $isegchar0
get_local $isegchar
i32.add
i32.or
call $addCell
tee_local $inext
i32.store offset=4
;; if ( lhnchar === 0 ) {
get_local $lhnchar
i32.eqz
if
;; this.buf32[icell+1] = this.addCell(0, inext, 0);
get_local $icell
i32.const 0
get_local $inext
i32.const 0
call $addCell
i32.store offset=4
else
;; this.buf32[inext+0] = this.addCell(0, 0, this.addSegment(lhnchar));
get_local $inext
i32.const 2
i32.shl
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
end
end
;; return 1;
i32.const 1
return
end end
i32.const 0 ;;
i32.const 1
)
;;
;; Private functions
;;
;;
;; unsigned int addCell(idown, iright, vseg)
;;
;; Add a new cell, return cell index.
;;
(func $addCell
(param $idown i32)
(param $iright i32)
(param $vseg i32)
(result i32) ;; result: index of added cell
(local $icell i32)
;;
;; let icell = this.buf32[HNBIGTRIE_TRIE1_SLOT];
;; this.buf32[HNBIGTRIE_TRIE1_SLOT] = icell + 12;
i32.const 260
i32.const 260
i32.load
tee_local $icell
i32.const 12
i32.add
i32.store
;; this.buf32[icell+0] = idown;
get_local $icell
get_local $idown
i32.store
;; this.buf32[icell+1] = iright;
get_local $icell
get_local $iright
i32.store offset=4
;; this.buf32[icell+2] = v;
get_local $icell
get_local $vseg
i32.store offset=8
;; return icell;
get_local $icell
i32.const 2
i32.shr_u
)
;;
;; unsigned int addSegment(lsegchar)
;;
;; Store a segment of characters and return a segment descriptor. The segment
;; is created from the character data in the needle buffer.
;;
(func $addSegment
(param $lsegchar i32)
(result i32) ;; result: segment descriptor
(local $char1 i32) ;; offset to end of character data section
(local $isegchar i32) ;; relative offset to first character of segment
(local $i i32) ;; iterator
;;
;; if ( lsegchar === 0 ) { return 0; }
get_local $lsegchar
i32.eqz
if
i32.const 0
return
end
;; let char1 = this.buf32[HNBIGTRIE_CHAR1_SLOT];
i32.const 268
i32.load
tee_local $char1
;; const isegchar = char1 - this.buf32[HNBIGTRIE_CHAR0_SLOT];
i32.const 264
i32.load
i32.sub
set_local $isegchar
;; let i = lsegchar;
get_local $lsegchar
set_local $i
;; do {
block $endOfSegment loop
;; this.buf[char1++] = this.buf[--i];
get_local $char1
get_local $i
i32.const -1
i32.add
tee_local $i
i32.load8_u
i32.store8
get_local $char1
i32.const 1
i32.add
set_local $char1
;; } while ( i !== 0 );
get_local $i
i32.eqz
br_if $endOfSegment
br 0
end end
;; this.buf32[HNBIGTRIE_CHAR1_SLOT] = char1;
i32.const 268
get_local $char1
i32.store
;; return (lsegchar << 24) | isegchar;
get_local $lsegchar
i32.const 24
i32.shl
get_local $isegchar
i32.or
) )
;; ;;