2018-11-03 05:58:46 -06:00
|
|
|
<!DOCTYPE html>
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
|
|
</head>
|
|
|
|
<body style="font: 14px sans-serif">
|
|
|
|
<h1>Benchmark of hostname-lookup data structures: Set, RegExp, HNTrie</h1>
|
|
|
|
<p><button id="createBenchmark">Creation</button> <button id="lookupBenchmark">Lookup</button></p>
|
|
|
|
<div id="results-0" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-1" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-2" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-3" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-4" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-5" style="white-space:pre;font-family:mono"></div>
|
|
|
|
<div id="results-6" style="white-space:pre;font-family:mono"></div>
|
|
|
|
|
|
|
|
<script src="https://rawcdn.githack.com/gorhill/uBlock/e83ffde5af29bd44ae529c5a60e2506970e7af34/src/js/hntrie.js"></script>
|
2018-11-04 13:45:59 -07:00
|
|
|
<script src="https://raw.githack.com/gorhill/uBlock/master/src/js/hntrie.js"></script>
|
2018-11-04 17:12:21 -07:00
|
|
|
<!-- script src="../../src/js/hntrie.js"></script> -->
|
2018-11-04 12:31:15 -07:00
|
|
|
<script src="hostname-pool.js"></script>
|
2018-11-03 05:58:46 -06:00
|
|
|
|
|
|
|
<script src="https://cdn.jsdelivr.net/lodash/4.17.2/lodash.min.js"></script>
|
|
|
|
<script src="https://cdn.jsdelivr.net/platform.js/1.3.3/platform.js"></script>
|
|
|
|
<script src="https://cdn.jsdelivr.net/benchmarkjs/2.1.2/benchmark.js"></script>
|
|
|
|
<script>
|
2018-11-04 12:31:15 -07:00
|
|
|
const randomHostname = function() {
|
|
|
|
return hostnamePool[Math.floor(Math.random() * hostnamePool.length)];
|
2018-11-03 05:58:46 -06:00
|
|
|
};
|
|
|
|
|
2018-11-04 12:31:15 -07:00
|
|
|
const randomNeedle = function() {
|
|
|
|
let needle = randomHostname();
|
|
|
|
const pos = needle.lastIndexOf('.');
|
|
|
|
if ( pos !== -1 ) {
|
|
|
|
needle = Math.random().toString(36).slice(2) + needle.slice(pos);
|
|
|
|
}
|
|
|
|
if ( Math.random() < 0.5 ) {
|
|
|
|
needle = Math.random().toString(36).slice(2, 6) + '.' + needle;
|
|
|
|
}
|
|
|
|
return needle;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Create "domain=" sets of all sizes (from 2 to 1024 at most)
|
|
|
|
const domainOpts = (function() {
|
|
|
|
const domainOpts = [];
|
|
|
|
let n = 2;
|
|
|
|
while ( n <= 1024 ) {
|
|
|
|
const domainOpt = [];
|
|
|
|
for ( let i = 0; i < n; i++ ) {
|
|
|
|
domainOpt.push(randomHostname());
|
|
|
|
}
|
|
|
|
domainOpts.push(domainOpt.join('|'));
|
|
|
|
n += (n >>> 1);
|
|
|
|
}
|
|
|
|
return domainOpts;
|
|
|
|
})();
|
|
|
|
|
2018-11-03 05:58:46 -06:00
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
var setBasedDictCreate = function(domainOpt) {
|
|
|
|
// Only one hostname
|
|
|
|
if ( domainOpt.indexOf('|') === -1 ) {
|
|
|
|
if ( domainOpt.startsWith('~') ) {
|
|
|
|
return domainOpt.slice(1);
|
|
|
|
}
|
|
|
|
return domainOpt;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Multiple hostnames: use a dictionary.
|
|
|
|
var hostnames = domainOpt.split('|');
|
|
|
|
var i, hostname, dict;
|
|
|
|
|
|
|
|
// First find out whether we have a homogeneous dictionary
|
|
|
|
var hit = false, miss = false;
|
|
|
|
i = hostnames.length;
|
|
|
|
while ( i-- ) {
|
|
|
|
if ( hostnames[i].startsWith('~') ) {
|
|
|
|
miss = true;
|
|
|
|
if ( hit ) { break; }
|
|
|
|
} else {
|
|
|
|
hit = true;
|
|
|
|
if ( miss ) { break; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Heterogenous dictionary: this can happen, though VERY rarely.
|
|
|
|
// Spotted one occurrence in EasyList Lite (cjxlist.txt):
|
|
|
|
// domain=photobucket.com|~secure.photobucket.com
|
|
|
|
if ( hit && miss ) {
|
|
|
|
dict = new Map();
|
|
|
|
i = hostnames.length;
|
|
|
|
while ( i-- ) {
|
|
|
|
hostname = hostnames[i];
|
|
|
|
if ( hostname.startsWith('~') ) {
|
|
|
|
dict.set(hostname.slice(1), false);
|
|
|
|
} else {
|
|
|
|
dict.set(hostname, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dict;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Homogeneous dictionary.
|
|
|
|
if ( hit ) {
|
|
|
|
return new Set(hostnames);
|
|
|
|
}
|
|
|
|
|
|
|
|
dict = new Set();
|
|
|
|
i = hostnames.length;
|
|
|
|
while ( i-- ) {
|
|
|
|
dict.add(hostnames[i].slice(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
return dict;
|
|
|
|
};
|
|
|
|
|
|
|
|
var setBasedDictTest = function(haystack, needle) {
|
|
|
|
var pos;
|
|
|
|
for (;;) {
|
|
|
|
if ( haystack.has(needle) ) { return true; }
|
|
|
|
pos = needle.indexOf('.');
|
|
|
|
if ( pos === -1 ) { break; }
|
|
|
|
needle = needle.slice(pos + 1);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
var regexBasedDictCreate = function(domainOpt) {
|
|
|
|
// Only one hostname
|
|
|
|
if ( domainOpt.indexOf('|') === -1 ) {
|
|
|
|
if ( domainOpt.startsWith('~') ) {
|
|
|
|
return domainOpt.slice(1);
|
|
|
|
}
|
|
|
|
return domainOpt;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Many hostnames.
|
|
|
|
var re;
|
|
|
|
|
|
|
|
// Must be in dictionary (none negated).
|
|
|
|
if ( domainOpt.indexOf('~') === -1 ) { // all non-negated
|
|
|
|
re = new RegExp('(?:^|\\.)(?:' + domainOpt.replace(/\./g, '\\.') + ')$');
|
|
|
|
re.exec('');
|
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Must not be in dictionary (all negated).
|
|
|
|
if ( reDomainOptAllNegated.test(domainOpt) ) {
|
|
|
|
re = new RegExp('(?:^|\\.)(?:' + domainOpt.replace(/~/g, '').replace(/\./g, '\\.') + ')$');
|
|
|
|
re.exec('');
|
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Heterogenous dictionary: this can happen, though VERY rarely.
|
|
|
|
// Spotted one occurrence in EasyList Lite (cjxlist.txt):
|
|
|
|
// domain=photobucket.com|~secure.photobucket.com
|
|
|
|
var hostnames = domainOpt.split('|'),
|
|
|
|
i = hostnames.length,
|
|
|
|
dict = new Map(),
|
|
|
|
hostname;
|
|
|
|
while ( i-- ) {
|
|
|
|
hostname = hostnames[i];
|
|
|
|
if ( hostname.startsWith('~') ) {
|
|
|
|
dict.set(hostname.slice(1), false);
|
|
|
|
} else {
|
|
|
|
dict.set(hostname, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dict;
|
|
|
|
};
|
|
|
|
var reDomainOptAllNegated = /^~(?:[^\|~]+\|~)+[^\|~]+$/;
|
|
|
|
|
|
|
|
var regexBasedDictTest = function(haystack, needle) {
|
|
|
|
return haystack.test(needle);
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
var oldTrieBasedDictCreate = function(domainOpt) {
|
|
|
|
return HNTrieBuilder.fromDomainOpt(domainOpt);
|
|
|
|
}
|
|
|
|
|
|
|
|
var oldTrieBasedDictTest = function(haystack, needle) {
|
|
|
|
return haystack.matches(needle);
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
var trieBasedDictCreate = function(domainOpt) {
|
|
|
|
return hnTrieManager.fromDomainOpt(domainOpt);
|
|
|
|
}
|
|
|
|
|
|
|
|
var trieBasedDictTest = function(haystack, needle) {
|
|
|
|
return haystack.matchesJS(needle);
|
|
|
|
};
|
|
|
|
|
|
|
|
var trieBasedDictTestWASM = function(haystack, needle) {
|
|
|
|
return haystack.matchesWASM(needle);
|
|
|
|
};
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2018-11-04 12:31:15 -07:00
|
|
|
const gBenchmarks = [ null ];
|
|
|
|
let gWhich;
|
|
|
|
|
|
|
|
/******************************************************************************/
|
2018-11-03 05:58:46 -06:00
|
|
|
|
|
|
|
function stdout(which, text) {
|
|
|
|
if ( which > 0 ) {
|
|
|
|
which = ((which - 1) % 3) + 1;
|
|
|
|
}
|
|
|
|
var r = document.querySelector('#results-' + which);
|
|
|
|
if ( text === '' ) {
|
|
|
|
r.innerHTML = '';
|
|
|
|
} else {
|
|
|
|
r.innerHTML += text;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function doBenchmark(which) {
|
|
|
|
stdout(0, '');
|
|
|
|
stdout(0, 'Benchmarking, the higher ops/sec the better.\n');
|
|
|
|
stdout(0, Benchmark.platform.toString() + '.');
|
|
|
|
stdout(0, '\n\n');
|
|
|
|
stdout(1, '');
|
|
|
|
stdout(2, '');
|
|
|
|
stdout(3, '');
|
|
|
|
gWhich = which;
|
|
|
|
gBenchmarks[gWhich].run({ 'async': true });
|
|
|
|
}
|
|
|
|
|
|
|
|
function nextBenchmark() {
|
|
|
|
stdout(gWhich, 'Done.\n\n');
|
|
|
|
gWhich += 1;
|
|
|
|
var bms = gBenchmarks[gWhich];
|
|
|
|
if ( bms ) {
|
|
|
|
bms.run({ 'async': true });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function exitBenchmark() {
|
|
|
|
stdout(gWhich, 'Done.\n\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
function initBenchmarks() {
|
|
|
|
gBenchmarks.push((function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
let dicts = [];
|
2018-11-03 05:58:46 -06:00
|
|
|
|
2018-11-04 12:31:15 -07:00
|
|
|
const createDict = function(fn) {
|
|
|
|
for ( let i = 0; i < domainOpts.length; i++ ) {
|
|
|
|
dicts[i] = fn(domainOpts[i]);
|
|
|
|
}
|
|
|
|
};
|
2018-11-03 05:58:46 -06:00
|
|
|
|
|
|
|
var bms = new Benchmark.Suite();
|
|
|
|
bms
|
|
|
|
.add(' - Set-based', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
createDict(setBasedDictCreate);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Regex-based', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
createDict(regexBasedDictCreate);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Trie-based (old)', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
createDict(oldTrieBasedDictCreate);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Trie-based', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
hnTrieManager.reset();
|
|
|
|
createDict(trieBasedDictCreate);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.on('start', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
dicts = [];
|
2018-11-03 05:58:46 -06:00
|
|
|
stdout(gWhich, '');
|
2018-11-04 12:31:15 -07:00
|
|
|
stdout(gWhich, 'Create dictionaries\n');
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.on('cycle', function(event) {
|
|
|
|
stdout(gWhich, String(event.target) + '\n');
|
|
|
|
})
|
|
|
|
.on('complete', exitBenchmark);
|
|
|
|
return bms;
|
|
|
|
})());
|
|
|
|
|
2018-11-04 12:31:15 -07:00
|
|
|
const lookupCount = 100;
|
2018-11-03 05:58:46 -06:00
|
|
|
|
|
|
|
gBenchmarks.push((function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
const bms = new Benchmark.Suite();
|
|
|
|
const needles = [];
|
|
|
|
|
|
|
|
let setDicts = [];
|
|
|
|
let regexDicts = [];
|
|
|
|
let oldTrieDicts = []
|
|
|
|
let newTrieDicts = []
|
|
|
|
let results = [];
|
|
|
|
|
|
|
|
const lookupDict = function(dicts, fn) {
|
|
|
|
for ( let i = 0; i < needles.length; i++ ) {
|
|
|
|
const needle = needles[i];
|
|
|
|
for ( const dict of dicts ) {
|
|
|
|
results[i] = fn(dict, needle);
|
2018-11-03 05:58:46 -06:00
|
|
|
}
|
2018-11-04 12:31:15 -07:00
|
|
|
}
|
|
|
|
};
|
2018-11-03 05:58:46 -06:00
|
|
|
|
|
|
|
bms
|
|
|
|
.add(' - Set-based', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
lookupDict(setDicts, setBasedDictTest);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Regex-based', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
lookupDict(regexDicts, regexBasedDictTest);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Trie-based (old)', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
lookupDict(oldTrieDicts, oldTrieBasedDictTest);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.add(' - Trie-based JS', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
lookupDict(newTrieDicts, trieBasedDictTest);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
.on('start', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
for ( let i = 0; i < lookupCount; i++ ) {
|
2018-11-03 05:58:46 -06:00
|
|
|
needles[i] = randomNeedle();
|
|
|
|
}
|
2018-11-04 12:31:15 -07:00
|
|
|
setDicts = [];
|
|
|
|
regexDicts = [];
|
|
|
|
oldTrieDicts = []
|
|
|
|
newTrieDicts = []
|
|
|
|
hnTrieManager.reset();
|
|
|
|
for ( const domainOpt of domainOpts ) {
|
|
|
|
setDicts.push(setBasedDictCreate(domainOpt));
|
|
|
|
regexDicts.push(regexBasedDictCreate(domainOpt));
|
|
|
|
oldTrieDicts.push(oldTrieBasedDictCreate(domainOpt));
|
|
|
|
newTrieDicts.push(trieBasedDictCreate(domainOpt));
|
2018-11-03 05:58:46 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
stdout(gWhich, '');
|
2018-11-04 12:31:15 -07:00
|
|
|
stdout(gWhich, 'Test ' + lookupCount + ' needles against ' + domainOpts.length + ' dictionaries of hostnames\n');
|
|
|
|
})
|
2018-11-03 05:58:46 -06:00
|
|
|
.on('cycle', function(event) {
|
|
|
|
stdout(gWhich, String(event.target) + '\n');
|
|
|
|
})
|
|
|
|
.on('complete', exitBenchmark);
|
|
|
|
|
|
|
|
if ( hnTrieManager.matchesWASM !== null ) {
|
|
|
|
bms.add(' - Trie-based WASM', function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
lookupDict(newTrieDicts, trieBasedDictTestWASM);
|
2018-11-03 05:58:46 -06:00
|
|
|
})
|
|
|
|
}
|
2018-11-04 12:31:15 -07:00
|
|
|
|
2018-11-03 05:58:46 -06:00
|
|
|
return bms;
|
|
|
|
})());
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
hnTrieManager.readyToUse().then(( ) => {
|
|
|
|
initBenchmarks();
|
|
|
|
});
|
|
|
|
|
|
|
|
document.getElementById('createBenchmark').onclick = function() {
|
|
|
|
doBenchmark(1);
|
|
|
|
};
|
|
|
|
document.getElementById('lookupBenchmark').onclick = function() {
|
2018-11-04 12:31:15 -07:00
|
|
|
doBenchmark(2);
|
2018-11-03 05:58:46 -06:00
|
|
|
};
|
|
|
|
</script>
|
|
|
|
</body>
|
|
|
|
</html>
|