diff --git a/src/js/background.js b/src/js/background.js index 347324b4d..5c9fbe364 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -312,13 +312,6 @@ const µBlock = { // jshint ignore:line return super.getTabOrigin(); } - getTabHostname() { - if ( this.tabHostname === undefined ) { - this.tabHostname = hostnameFromURI(this.getTabOrigin()); - } - return super.getTabHostname(); - } - toLogger() { this.tstamp = Date.now(); if ( this.domain === undefined ) { diff --git a/src/js/benchmarks.js b/src/js/benchmarks.js new file mode 100644 index 000000000..4514f28c5 --- /dev/null +++ b/src/js/benchmarks.js @@ -0,0 +1,298 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2014-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +import cosmeticFilteringEngine from './cosmetic-filtering.js'; +import globals from './globals.js'; +import io from './assets.js'; +import scriptletFilteringEngine from './scriptlet-filtering.js'; +import staticNetFilteringEngine from './static-net-filtering.js'; +import µb from './background.js'; +import { FilteringContext } from './filtering-context.js'; +import { sessionFirewall } from './dynamic-net-filtering.js'; +import { LineIterator } from './text-iterators.js'; + +import { + domainFromHostname, + entityFromDomain, + hostnameFromURI, +} from './uri-utils.js'; + +/******************************************************************************/ + +// The requests.json.gz file can be downloaded from: +// https://cdn.cliqz.com/adblocking/requests_top500.json.gz +// +// Which is linked from: +// https://whotracks.me/blog/adblockers_performance_study.html +// +// Copy the file into ./tmp/requests.json.gz +// +// If the file is present when you build uBO using `make-[target].sh` from +// the shell, the resulting package will have `./assets/requests.json`, which +// will be looked-up by the method below to launch a benchmark session. +// +// From uBO's dev console, launch the benchmark: +// µBlock.staticNetFilteringEngine.benchmark(); +// +// The usual browser dev tools can be used to obtain useful profiling +// data, i.e. start the profiler, call the benchmark method from the +// console, then stop the profiler when it completes. +// +// Keep in mind that the measurements at the blog post above where obtained +// with ONLY EasyList. The CPU reportedly used was: +// https://www.cpubenchmark.net/cpu.php?cpu=Intel+Core+i7-6600U+%40+2.60GHz&id=2608 +// +// Rename ./tmp/requests.json.gz to something else if you no longer want +// ./assets/requests.json in the build. + +const loadBenchmarkDataset = (( ) => { + let datasetPromise; + let ttlTimer; + + return function() { + if ( ttlTimer !== undefined ) { + globals.clearTimeout(ttlTimer); + ttlTimer = undefined; + } + + globals.setTimeout(( ) => { + ttlTimer = undefined; + datasetPromise = undefined; + }, 5 * 60 * 1000); + + if ( datasetPromise !== undefined ) { + return datasetPromise; + } + + const datasetURL = µb.hiddenSettings.benchmarkDatasetURL; + if ( datasetURL === 'unset' ) { + console.info(`No benchmark dataset available.`); + return Promise.resolve(); + } + console.info(`Loading benchmark dataset...`); + datasetPromise = io.fetchText(datasetURL).then(details => { + console.info(`Parsing benchmark dataset...`); + const requests = []; + const lineIter = new LineIterator(details.content); + while ( lineIter.eot() === false ) { + let request; + try { + request = JSON.parse(lineIter.next()); + } catch(ex) { + } + if ( request instanceof Object === false ) { continue; } + if ( !request.frameUrl || !request.url ) { continue; } + if ( request.cpt === 'document' ) { + request.cpt = 'main_frame'; + } else if ( request.cpt === 'xhr' ) { + request.cpt = 'xmlhttprequest'; + } + requests.push(request); + } + return requests; + }).catch(details => { + console.info(`Not found: ${details.url}`); + datasetPromise = undefined; + }); + + return datasetPromise; + }; +})(); + +/******************************************************************************/ + +// action: 1=test + +µb.benchmarkStaticNetFiltering = async function(options = {}) { + const { target, redirectEngine } = options; + + const requests = await loadBenchmarkDataset(); + if ( Array.isArray(requests) === false || requests.length === 0 ) { + const text = 'No dataset found to benchmark'; + console.info(text); + return text; + } + + console.info(`Benchmarking staticNetFilteringEngine.matchRequest()...`); + + const fctxt = new FilteringContext(); + + if ( typeof target === 'number' ) { + const request = requests[target]; + fctxt.setURL(request.url); + fctxt.setDocOriginFromURL(request.frameUrl); + fctxt.setType(request.cpt); + const r = staticNetFilteringEngine.matchRequest(fctxt); + console.info(`Result=${r}:`); + console.info(`\ttype=${fctxt.type}`); + console.info(`\turl=${fctxt.url}`); + console.info(`\tdocOrigin=${fctxt.getDocOrigin()}`); + if ( r !== 0 ) { + console.info(staticNetFilteringEngine.toLogData()); + } + return; + } + + const t0 = globals.performance.now(); + let matchCount = 0; + let blockCount = 0; + let allowCount = 0; + for ( let i = 0; i < requests.length; i++ ) { + const request = requests[i]; + fctxt.setURL(request.url); + fctxt.setDocOriginFromURL(request.frameUrl); + fctxt.setType(request.cpt); + staticNetFilteringEngine.redirectURL = undefined; + const r = staticNetFilteringEngine.matchRequest(fctxt); + matchCount += 1; + if ( r === 1 ) { blockCount += 1; } + else if ( r === 2 ) { allowCount += 1; } + if ( r !== 1 ) { + if ( staticNetFilteringEngine.hasQuery(fctxt) ) { + staticNetFilteringEngine.filterQuery(fctxt, 'queryprune'); + } + if ( fctxt.type === 'main_frame' || fctxt.type === 'sub_frame' ) { + staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'csp'); + } + staticNetFilteringEngine.matchHeaders(fctxt, []); + } else if ( redirectEngine !== undefined ) { + staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt); + } + } + const t1 = globals.performance.now(); + const dur = t1 - t0; + + const output = [ + 'Benchmarked static network filtering engine:', + `\tEvaluated ${matchCount} match calls in ${dur.toFixed(0)} ms`, + `\tAverage: ${(dur / matchCount).toFixed(3)} ms per request`, + `\tNot blocked: ${matchCount - blockCount - allowCount}`, + `\tBlocked: ${blockCount}`, + `\tUnblocked: ${allowCount}`, + ]; + const s = output.join('\n'); + console.info(s); + return s; +}; + +/******************************************************************************/ + +µb.benchmarkDynamicNetFiltering = async function() { + const requests = await loadBenchmarkDataset(); + if ( Array.isArray(requests) === false || requests.length === 0 ) { + console.info('No requests found to benchmark'); + return; + } + console.info(`Benchmarking sessionFirewall.evaluateCellZY()...`); + const fctxt = new FilteringContext(); + const t0 = globals.performance.now(); + for ( const request of requests ) { + fctxt.setURL(request.url); + fctxt.setTabOriginFromURL(request.frameUrl); + fctxt.setType(request.cpt); + sessionFirewall.evaluateCellZY( + fctxt.getTabHostname(), + fctxt.getHostname(), + fctxt.type + ); + } + const t1 = globals.performance.now(); + const dur = t1 - t0; + console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`); + console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`); +}; + +/******************************************************************************/ + +µb.benchmarkCosmeticFiltering = async function() { + const requests = await loadBenchmarkDataset(); + if ( Array.isArray(requests) === false || requests.length === 0 ) { + console.info('No requests found to benchmark'); + return; + } + console.info('Benchmarking cosmeticFilteringEngine.retrieveSpecificSelectors()...'); + const details = { + tabId: undefined, + frameId: undefined, + hostname: '', + domain: '', + entity: '', + }; + const options = { + noSpecificCosmeticFiltering: false, + noGenericCosmeticFiltering: false, + }; + let count = 0; + const t0 = globals.performance.now(); + for ( let i = 0; i < requests.length; i++ ) { + const request = requests[i]; + if ( request.cpt !== 'main_frame' ) { continue; } + count += 1; + details.hostname = hostnameFromURI(request.url); + details.domain = domainFromHostname(details.hostname); + details.entity = entityFromDomain(details.domain); + void cosmeticFilteringEngine.retrieveSpecificSelectors(details, options); + } + const t1 = globals.performance.now(); + const dur = t1 - t0; + console.info(`Evaluated ${count} requests in ${dur.toFixed(0)} ms`); + console.info(`\tAverage: ${(dur / count).toFixed(3)} ms per request`); +}; + +/******************************************************************************/ + +µb.benchmarkScriptletFiltering = async function() { + const requests = await loadBenchmarkDataset(); + if ( Array.isArray(requests) === false || requests.length === 0 ) { + console.info('No requests found to benchmark'); + return; + } + console.info('Benchmarking scriptletFilteringEngine.retrieve()...'); + const details = { + domain: '', + entity: '', + hostname: '', + tabId: 0, + url: '', + }; + let count = 0; + const t0 = globals.performance.now(); + for ( let i = 0; i < requests.length; i++ ) { + const request = requests[i]; + if ( request.cpt !== 'main_frame' ) { continue; } + count += 1; + details.url = request.url; + details.hostname = hostnameFromURI(request.url); + details.domain = domainFromHostname(details.hostname); + details.entity = entityFromDomain(details.domain); + void scriptletFilteringEngine.retrieve(details); + } + const t1 = globals.performance.now(); + const dur = t1 - t0; + console.info(`Evaluated ${count} requests in ${dur.toFixed(0)} ms`); + console.info(`\tAverage: ${(dur / count).toFixed(3)} ms per request`); +}; + +/******************************************************************************/ diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 02b01bb64..11280e7ec 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -31,12 +31,6 @@ import { StaticExtFilteringSessionDB, } from './static-ext-filtering-db.js'; -import { - domainFromHostname, - entityFromDomain, - hostnameFromURI, -} from './uri-utils.js'; - /******************************************************************************/ const cosmeticSurveyingMissCountMax = @@ -1137,43 +1131,6 @@ FilterContainer.prototype.getFilterCount = function() { /******************************************************************************/ -FilterContainer.prototype.benchmark = async function() { - const requests = await µb.loadBenchmarkDataset(); - if ( Array.isArray(requests) === false || requests.length === 0 ) { - console.info('No requests found to benchmark'); - return; - } - console.info('Benchmarking cosmeticFilteringEngine.retrieveSpecificSelectors()...'); - const details = { - tabId: undefined, - frameId: undefined, - hostname: '', - domain: '', - entity: '', - }; - const options = { - noSpecificCosmeticFiltering: false, - noGenericCosmeticFiltering: false, - }; - let count = 0; - const t0 = self.performance.now(); - for ( let i = 0; i < requests.length; i++ ) { - const request = requests[i]; - if ( request.cpt !== 'main_frame' ) { continue; } - count += 1; - details.hostname = hostnameFromURI(request.url); - details.domain = domainFromHostname(details.hostname); - details.entity = entityFromDomain(details.domain); - void this.retrieveSpecificSelectors(details, options); - } - const t1 = self.performance.now(); - const dur = t1 - t0; - console.info(`Evaluated ${count} requests in ${dur.toFixed(0)} ms`); - console.info(`\tAverage: ${(dur / count).toFixed(3)} ms per request`); -}; - -/******************************************************************************/ - const cosmeticFilteringEngine = new FilterContainer(); export default cosmeticFilteringEngine; diff --git a/src/js/dynamic-net-filtering.js b/src/js/dynamic-net-filtering.js index 47e8d3935..926036964 100644 --- a/src/js/dynamic-net-filtering.js +++ b/src/js/dynamic-net-filtering.js @@ -28,10 +28,13 @@ import '../lib/punycode.js'; import globals from './globals.js'; -import µb from './background.js'; -import { domainFromHostname } from './uri-utils.js'; import { LineIterator } from './text-iterators.js'; +import { + decomposeHostname, + domainFromHostname, +} from './uri-utils.js'; + /******************************************************************************/ const punycode = globals.punycode; @@ -266,7 +269,7 @@ const Matrix = class { evaluateCellZ(srcHostname, desHostname, type) { - µb.decomposeHostname(srcHostname, this.decomposedSource); + decomposeHostname(srcHostname, this.decomposedSource); this.type = type; const bitOffset = typeBitOffsets[type]; for ( const shn of this.decomposedSource ) { @@ -296,7 +299,7 @@ const Matrix = class { // Precedence: from most specific to least specific // Specific-destination, any party, any type - µb.decomposeHostname(desHostname, this.decomposedDestination); + decomposeHostname(desHostname, this.decomposedDestination); for ( const dhn of this.decomposedDestination ) { if ( dhn === '*' ) { break; } this.y = dhn; @@ -506,32 +509,6 @@ const Matrix = class { this.changed = true; return true; } - - - async benchmark() { - const requests = await µb.loadBenchmarkDataset(); - if ( Array.isArray(requests) === false || requests.length === 0 ) { - console.info('No requests found to benchmark'); - return; - } - console.info(`Benchmarking sessionFirewall.evaluateCellZY()...`); - const fctxt = µb.filteringContext.duplicate(); - const t0 = self.performance.now(); - for ( const request of requests ) { - fctxt.setURL(request.url); - fctxt.setTabOriginFromURL(request.frameUrl); - fctxt.setType(request.cpt); - this.evaluateCellZY( - fctxt.getTabHostname(), - fctxt.getHostname(), - fctxt.type - ); - } - const t1 = self.performance.now(); - const dur = t1 - t0; - console.info(`Evaluated ${requests.length} requests in ${dur.toFixed(0)} ms`); - console.info(`\tAverage: ${(dur / requests.length).toFixed(3)} ms per request`); - } }; Matrix.prototype.intToActionMap = new Map([ diff --git a/src/js/filtering-context.js b/src/js/filtering-context.js index 53d99e80d..f0a9ea8e7 100644 --- a/src/js/filtering-context.js +++ b/src/js/filtering-context.js @@ -282,6 +282,9 @@ const FilteringContext = class { } getTabHostname() { + if ( this.tabHostname === undefined ) { + this.tabHostname = hostnameFromURI(this.getTabOrigin()); + } return this.tabHostname; } diff --git a/src/js/hnswitches.js b/src/js/hnswitches.js index fdc08f243..63178ca89 100644 --- a/src/js/hnswitches.js +++ b/src/js/hnswitches.js @@ -28,8 +28,8 @@ import '../lib/punycode.js'; import globals from './globals.js'; -import µb from './background.js'; import { LineIterator } from './text-iterators.js'; +import { decomposeHostname } from './uri-utils.js'; /******************************************************************************/ @@ -228,7 +228,7 @@ HnSwitches.prototype.evaluateZ = function(switchName, hostname) { return false; } this.n = switchName; - µb.decomposeHostname(hostname, this.decomposedSource); + decomposeHostname(hostname, this.decomposedSource); for ( const shn of this.decomposedSource ) { let bits = this.switches.get(shn); if ( bits !== undefined ) { diff --git a/src/js/messaging.js b/src/js/messaging.js index c14d746db..65c6cea9d 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -64,6 +64,8 @@ import { sessionURLFiltering, } from './url-net-filtering.js'; +import './benchmarks.js'; + /******************************************************************************/ // https://github.com/uBlockOrigin/uBlock-issues/issues/710 @@ -137,13 +139,8 @@ const onMessage = function(request, sender, callback) { return; case 'sfneBenchmark': - µb.loadBenchmarkDataset().then(requests => { - staticNetFilteringEngine.benchmark( - requests, - { redirectEngine } - ).then(result => { - callback(result); - }); + µb.benchmarkStaticNetFiltering({ redirectEngine }).then(result => { + callback(result); }); return; diff --git a/src/js/scriptlet-filtering.js b/src/js/scriptlet-filtering.js index 73ffca877..8eedb38bc 100644 --- a/src/js/scriptlet-filtering.js +++ b/src/js/scriptlet-filtering.js @@ -430,38 +430,6 @@ scriptletFilteringEngine.fromSelfie = function(selfie) { scriptletDB.fromSelfie(selfie); }; -scriptletFilteringEngine.benchmark = async function() { - const requests = await µb.loadBenchmarkDataset(); - if ( Array.isArray(requests) === false || requests.length === 0 ) { - console.info('No requests found to benchmark'); - return; - } - console.info('Benchmarking scriptletFilteringEngine.retrieve()...'); - const details = { - domain: '', - entity: '', - hostname: '', - tabId: 0, - url: '', - }; - let count = 0; - const t0 = self.performance.now(); - for ( let i = 0; i < requests.length; i++ ) { - const request = requests[i]; - if ( request.cpt !== 'main_frame' ) { continue; } - count += 1; - details.url = request.url; - details.hostname = hostnameFromURI(request.url); - details.domain = domainFromHostname(details.hostname); - details.entity = entityFromDomain(details.domain); - void this.retrieve(details); - } - const t1 = self.performance.now(); - const dur = t1 - t0; - console.info(`Evaluated ${count} requests in ${dur.toFixed(0)} ms`); - console.info(`\tAverage: ${(dur / count).toFixed(3)} ms per request`); -}; - /******************************************************************************/ export default scriptletFilteringEngine; diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 0e539d057..d0eb6aaf2 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -4458,108 +4458,6 @@ FilterContainer.prototype.enableWASM = function(modulePath) { /******************************************************************************/ -// action: 1=test, 2=record - -FilterContainer.prototype.benchmark = async function(requests, options = {}) { - const { action, target, redirectEngine } = options; - - if ( Array.isArray(requests) === false || requests.length === 0 ) { - const text = 'No dataset found to benchmark'; - console.info(text); - return text; - } - - console.info(`Benchmarking staticNetFilteringEngine.matchRequest()...`); - - const fctxt = new FilteringContext(); - - if ( typeof target === 'number' ) { - const request = requests[target]; - fctxt.setURL(request.url); - fctxt.setDocOriginFromURL(request.frameUrl); - fctxt.setType(request.cpt); - const r = this.matchRequest(fctxt); - console.info(`Result=${r}:`); - console.info(`\ttype=${fctxt.type}`); - console.info(`\turl=${fctxt.url}`); - console.info(`\tdocOrigin=${fctxt.getDocOrigin()}`); - if ( r !== 0 ) { - console.info(this.toLogData()); - } - return; - } - - let expected, recorded; - if ( action === 1 ) { - try { - expected = JSON.parse( - keyvalStore.getItem('FilterContainer.benchmark.results') - ); - } catch(ex) { - } - } - if ( action === 2 ) { - recorded = []; - } - - const t0 = globals.performance.now(); - let matchCount = 0; - for ( let i = 0; i < requests.length; i++ ) { - const request = requests[i]; - fctxt.setURL(request.url); - fctxt.setDocOriginFromURL(request.frameUrl); - fctxt.setType(request.cpt); - this.redirectURL = undefined; - const r = this.matchRequest(fctxt); - matchCount += 1; - if ( recorded !== undefined ) { recorded.push(r); } - if ( expected !== undefined && r !== expected[i] ) { - console.info(`Mismatch with reference results at ${i}:`); - console.info(`\tExpected ${expected[i]}, got ${r}:`); - console.info(`\ttype=${fctxt.type}`); - console.info(`\turl=${fctxt.url}`); - console.info(`\tdocOrigin=${fctxt.getDocOrigin()}`); - } - if ( r !== 1 ) { - if ( this.hasQuery(fctxt) ) { - this.filterQuery(fctxt, 'queryprune'); - } - if ( fctxt.type === 'main_frame' || fctxt.type === 'sub_frame' ) { - this.matchAndFetchModifiers(fctxt, 'csp'); - } - this.matchHeaders(fctxt, []); - } else if ( redirectEngine !== undefined ) { - this.redirectRequest(redirectEngine, fctxt); - } - } - const t1 = globals.performance.now(); - const dur = t1 - t0; - - if ( recorded !== undefined ) { - keyvalStore.setItem( - 'FilterContainer.benchmark.results', - JSON.stringify(recorded) - ); - } - - const output = [ - 'Benchmarked static network filtering engine:', - `\tEvaluated ${matchCount} match calls in ${dur.toFixed(0)} ms`, - `\tAverage: ${(dur / matchCount).toFixed(3)} ms per request`, - ]; - if ( expected !== undefined ) { - output.push( - `\tBlocked: ${expected.reduce((n,r)=>{return r===1?n+1:n;},0)}`, - `\tExcepted: ${expected.reduce((n,r)=>{return r===2?n+1:n;},0)}`, - ); - } - const s = output.join('\n'); - console.info(s); - return s; -}; - -/******************************************************************************/ - FilterContainer.prototype.test = async function(docURL, type, url) { const fctxt = new FilteringContext(); fctxt.setDocOriginFromURL(docURL); diff --git a/src/js/uri-utils.js b/src/js/uri-utils.js index 99cbfdd7c..f371af588 100644 --- a/src/js/uri-utils.js +++ b/src/js/uri-utils.js @@ -54,26 +54,32 @@ const reHostnameFromNetworkURL = /^(?:http|ws|ftp)s?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])(?::\d+)?\//; const reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/; +const reNetworkURI = + /^(?:ftps?|https?|wss?):\/\//; + +// For performance purpose, as simple tests as possible +const reIPv4VeryCoarse = /\.\d+$/; +const reHostnameVeryCoarse = /[g-z_\-]/; /******************************************************************************/ -const domainFromHostname = function(hostname) { +function domainFromHostname(hostname) { return reIPAddressNaive.test(hostname) ? hostname : psl.getDomain(hostname); -}; +} -const domainFromURI = function(uri) { +function domainFromURI(uri) { if ( !uri ) { return ''; } return domainFromHostname(hostnameFromURI(uri)); -}; +} -const entityFromDomain = function(domain) { +function entityFromDomain(domain) { const pos = domain.indexOf('.'); return pos !== -1 ? domain.slice(0, pos) + '.*' : ''; -}; +} -const hostnameFromURI = function(uri) { +function hostnameFromURI(uri) { let matches = reCommonHostnameFromURL.exec(uri); if ( matches !== null ) { return matches[1]; } matches = reAuthorityFromURI.exec(uri); @@ -95,27 +101,73 @@ const hostnameFromURI = function(uri) { hostname = punycode.toASCII(hostname.toLowerCase()); } return hostname; -}; +} -const hostnameFromNetworkURL = function(url) { +function hostnameFromNetworkURL(url) { const matches = reHostnameFromNetworkURL.exec(url); return matches !== null ? matches[1] : ''; -}; +} -const originFromURI = function(uri) { +function originFromURI(uri) { const matches = reOriginFromURI.exec(uri); return matches !== null ? matches[0].toLowerCase() : ''; -}; +} -const isNetworkURI = function(uri) { +function isNetworkURI(uri) { return reNetworkURI.test(uri); -}; +} -const reNetworkURI = /^(?:ftps?|https?|wss?):\/\//; +/******************************************************************************/ + +function toBroaderHostname(hostname) { + const pos = hostname.indexOf('.'); + if ( pos !== -1 ) { + return hostname.slice(pos + 1); + } + return hostname !== '*' && hostname !== '' ? '*' : ''; +} + +function toBroaderIPv4Address(ipaddress) { + if ( ipaddress === '*' || ipaddress === '' ) { return ''; } + const pos = ipaddress.lastIndexOf('.'); + if ( pos === -1 ) { return '*'; } + return ipaddress.slice(0, pos); +} + +function toBroaderIPv6Address(ipaddress) { + return ipaddress !== '*' && ipaddress !== '' ? '*' : ''; +} + +function decomposeHostname(hostname, out) { + if ( out.length !== 0 && out[0] === hostname ) { + return out; + } + let broadenFn; + if ( reHostnameVeryCoarse.test(hostname) === false ) { + if ( reIPv4VeryCoarse.test(hostname) ) { + broadenFn = toBroaderIPv4Address; + } else if ( hostname.startsWith('[') ) { + broadenFn = toBroaderIPv6Address; + } + } + if ( broadenFn === undefined ) { + broadenFn = toBroaderHostname; + } + out[0] = hostname; + let i = 1; + for (;;) { + hostname = broadenFn(hostname); + if ( hostname === '' ) { break; } + out[i++] = hostname; + } + out.length = i; + return out; +} /******************************************************************************/ export { + decomposeHostname, domainFromHostname, domainFromURI, entityFromDomain, diff --git a/src/js/url-net-filtering.js b/src/js/url-net-filtering.js index 7ee40a3b8..8b4d56a67 100644 --- a/src/js/url-net-filtering.js +++ b/src/js/url-net-filtering.js @@ -23,8 +23,8 @@ /******************************************************************************/ -import µb from './background.js'; import { LineIterator } from './text-iterators.js'; +import { decomposeHostname } from './uri-utils.js'; /******************************************************************************* @@ -200,7 +200,7 @@ URLNetFiltering.prototype.evaluateZ = function(context, target, type) { if ( this.rules.size === 0 ) { return 0; } - µb.decomposeHostname(context, this.decomposedSource); + decomposeHostname(context, this.decomposedSource); for ( let shn of this.decomposedSource ) { this.context = shn; let entries = this.rules.get(shn + ' ' + type); diff --git a/src/js/utils.js b/src/js/utils.js index 4308cdc0e..5e996c684 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -23,9 +23,7 @@ /******************************************************************************/ -import io from './assets.js'; import µb from './background.js'; -import { LineIterator } from './text-iterators.js'; /******************************************************************************/ @@ -143,58 +141,6 @@ import { LineIterator } from './text-iterators.js'; /******************************************************************************/ -µb.decomposeHostname = (( ) => { - // For performance purpose, as simple tests as possible - const reHostnameVeryCoarse = /[g-z_-]/; - const reIPv4VeryCoarse = /\.\d+$/; - - const toBroaderHostname = function(hostname) { - const pos = hostname.indexOf('.'); - if ( pos !== -1 ) { - return hostname.slice(pos + 1); - } - return hostname !== '*' && hostname !== '' ? '*' : ''; - }; - - const toBroaderIPv4Address = function(ipaddress) { - if ( ipaddress === '*' || ipaddress === '' ) { return ''; } - const pos = ipaddress.lastIndexOf('.'); - if ( pos === -1 ) { return '*'; } - return ipaddress.slice(0, pos); - }; - - const toBroaderIPv6Address = function(ipaddress) { - return ipaddress !== '*' && ipaddress !== '' ? '*' : ''; - }; - - return function decomposeHostname(hostname, decomposed) { - if ( decomposed.length === 0 || decomposed[0] !== hostname ) { - let broaden; - if ( reHostnameVeryCoarse.test(hostname) === false ) { - if ( reIPv4VeryCoarse.test(hostname) ) { - broaden = toBroaderIPv4Address; - } else if ( hostname.startsWith('[') ) { - broaden = toBroaderIPv6Address; - } - } - if ( broaden === undefined ) { - broaden = toBroaderHostname; - } - decomposed[0] = hostname; - let i = 1; - for (;;) { - hostname = broaden(hostname); - if ( hostname === '' ) { break; } - decomposed[i++] = hostname; - } - decomposed.length = i; - } - return decomposed; - }; -})(); - -/******************************************************************************/ - // TODO: evaluate using TextEncoder/TextDecoder µb.orphanizeString = function(s) { @@ -203,88 +149,6 @@ import { LineIterator } from './text-iterators.js'; /******************************************************************************/ -// The requests.json.gz file can be downloaded from: -// https://cdn.cliqz.com/adblocking/requests_top500.json.gz -// -// Which is linked from: -// https://whotracks.me/blog/adblockers_performance_study.html -// -// Copy the file into ./tmp/requests.json.gz -// -// If the file is present when you build uBO using `make-[target].sh` from -// the shell, the resulting package will have `./assets/requests.json`, which -// will be looked-up by the method below to launch a benchmark session. -// -// From uBO's dev console, launch the benchmark: -// µBlock.staticNetFilteringEngine.benchmark(); -// -// The usual browser dev tools can be used to obtain useful profiling -// data, i.e. start the profiler, call the benchmark method from the -// console, then stop the profiler when it completes. -// -// Keep in mind that the measurements at the blog post above where obtained -// with ONLY EasyList. The CPU reportedly used was: -// https://www.cpubenchmark.net/cpu.php?cpu=Intel+Core+i7-6600U+%40+2.60GHz&id=2608 -// -// Rename ./tmp/requests.json.gz to something else if you no longer want -// ./assets/requests.json in the build. - -µb.loadBenchmarkDataset = (( ) => { - let datasetPromise; - let ttlTimer; - - return function() { - if ( ttlTimer !== undefined ) { - clearTimeout(ttlTimer); - ttlTimer = undefined; - } - - vAPI.setTimeout(( ) => { - ttlTimer = undefined; - datasetPromise = undefined; - }, 5 * 60 * 1000); - - if ( datasetPromise !== undefined ) { - return datasetPromise; - } - - const datasetURL = µb.hiddenSettings.benchmarkDatasetURL; - if ( datasetURL === 'unset' ) { - console.info(`No benchmark dataset available.`); - return Promise.resolve(); - } - console.info(`Loading benchmark dataset...`); - datasetPromise = io.fetchText(datasetURL).then(details => { - console.info(`Parsing benchmark dataset...`); - const requests = []; - const lineIter = new LineIterator(details.content); - while ( lineIter.eot() === false ) { - let request; - try { - request = JSON.parse(lineIter.next()); - } catch(ex) { - } - if ( request instanceof Object === false ) { continue; } - if ( !request.frameUrl || !request.url ) { continue; } - if ( request.cpt === 'document' ) { - request.cpt = 'main_frame'; - } else if ( request.cpt === 'xhr' ) { - request.cpt = 'xmlhttprequest'; - } - requests.push(request); - } - return requests; - }).catch(details => { - console.info(`Not found: ${details.url}`); - datasetPromise = undefined; - }); - - return datasetPromise; - }; -})(); - -/******************************************************************************/ - µb.fireDOMEvent = function(name) { if ( window instanceof Object &&