2021-08-15 13:47:40 -06:00
|
|
|
/*******************************************************************************
|
|
|
|
|
|
|
|
uBlock Origin - a browser extension to block requests.
|
|
|
|
Copyright (C) 2014-present Raymond Hill
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see {http://www.gnu.org/licenses/}.
|
|
|
|
|
|
|
|
Home: https://github.com/gorhill/uBlock
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* eslint-disable-next-line no-redeclare */
|
|
|
|
/* globals process */
|
|
|
|
|
|
|
|
'use strict';
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
import { strict as assert } from 'assert';
|
|
|
|
import { createRequire } from 'module';
|
2022-12-22 11:13:38 -07:00
|
|
|
import { readFile, writeFile, mkdir } from 'fs/promises';
|
|
|
|
import { dirname } from 'path';
|
2021-08-15 13:47:40 -06:00
|
|
|
import { fileURLToPath } from 'url';
|
|
|
|
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
|
|
|
|
import { enableWASM, StaticNetFilteringEngine } from './index.js';
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
const FLAGS = process.argv.slice(2);
|
|
|
|
const COMPARE = FLAGS.includes('compare');
|
|
|
|
const MAXCOST = FLAGS.includes('maxcost');
|
2021-08-29 06:58:20 -06:00
|
|
|
const MEDCOST = FLAGS.includes('medcost');
|
2021-08-15 13:47:40 -06:00
|
|
|
const MINCOST = FLAGS.includes('mincost');
|
2021-08-17 10:48:39 -06:00
|
|
|
const MODIFIERS = FLAGS.includes('modifiers');
|
2021-08-15 13:47:40 -06:00
|
|
|
const RECORD = FLAGS.includes('record');
|
|
|
|
const WASM = FLAGS.includes('wasm');
|
2021-08-29 06:58:20 -06:00
|
|
|
const NEED_RESULTS = COMPARE || MAXCOST || MEDCOST || MINCOST || RECORD;
|
2021-08-15 13:47:40 -06:00
|
|
|
|
2021-08-17 10:48:39 -06:00
|
|
|
// This maps puppeteer types to WebRequest types
|
|
|
|
const WEBREQUEST_OPTIONS = {
|
|
|
|
// Consider document requests as sub_document. This is because the request
|
|
|
|
// dataset does not contain sub_frame or main_frame but only 'document' and
|
|
|
|
// different blockers have different behaviours.
|
|
|
|
document: 'sub_frame',
|
|
|
|
stylesheet: 'stylesheet',
|
|
|
|
image: 'image',
|
|
|
|
media: 'media',
|
|
|
|
font: 'font',
|
|
|
|
script: 'script',
|
|
|
|
xhr: 'xmlhttprequest',
|
|
|
|
fetch: 'xmlhttprequest',
|
|
|
|
websocket: 'websocket',
|
|
|
|
ping: 'ping',
|
|
|
|
// other
|
|
|
|
other: 'other',
|
|
|
|
eventsource: 'other',
|
|
|
|
manifest: 'other',
|
|
|
|
texttrack: 'other',
|
|
|
|
};
|
|
|
|
|
2021-08-15 13:47:40 -06:00
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
function nanoToMilli(bigint) {
|
|
|
|
return (Number(bigint) / 1000000).toFixed(2) + ' ms';
|
|
|
|
}
|
|
|
|
|
|
|
|
function nanoToMicro(bigint) {
|
|
|
|
return (Number(bigint) / 1000).toFixed(2) + ' µs';
|
|
|
|
}
|
|
|
|
|
|
|
|
async function read(path) {
|
2022-12-22 11:13:38 -07:00
|
|
|
return readFile(path, 'utf8');
|
2021-08-15 13:47:40 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
async function write(path, data) {
|
2022-12-22 11:13:38 -07:00
|
|
|
await mkdir(dirname(path), { recursive: true });
|
|
|
|
return writeFile(path, data, 'utf8');
|
2021-08-15 13:47:40 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2021-08-17 10:48:39 -06:00
|
|
|
async function matchRequests(engine, requests) {
|
|
|
|
const results = [];
|
|
|
|
const details = {
|
|
|
|
r: 0,
|
|
|
|
f: undefined,
|
|
|
|
type: '',
|
|
|
|
url: '',
|
|
|
|
originURL: '',
|
|
|
|
t: 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
let notBlockedCount = 0;
|
|
|
|
let blockedCount = 0;
|
|
|
|
let unblockedCount = 0;
|
|
|
|
|
|
|
|
const start = process.hrtime.bigint();
|
|
|
|
|
|
|
|
for ( let i = 0; i < requests.length; i++ ) {
|
|
|
|
const request = requests[i];
|
|
|
|
const reqstart = process.hrtime.bigint();
|
|
|
|
details.type = WEBREQUEST_OPTIONS[request.cpt];
|
|
|
|
details.url = request.url;
|
|
|
|
details.originURL = request.frameUrl;
|
|
|
|
const r = engine.matchRequest(details);
|
|
|
|
if ( r === 0 ) {
|
|
|
|
notBlockedCount += 1;
|
|
|
|
} else if ( r === 1 ) {
|
|
|
|
blockedCount += 1;
|
|
|
|
} else {
|
|
|
|
unblockedCount += 1;
|
|
|
|
}
|
|
|
|
if ( NEED_RESULTS !== true ) { continue; }
|
|
|
|
const reqstop = process.hrtime.bigint();
|
|
|
|
details.r = r;
|
|
|
|
details.f = r !== 0 ? engine.toLogData().raw : undefined;
|
|
|
|
details.t = Math.round(Number(reqstop - reqstart) / 10) / 100;
|
|
|
|
results.push([ i, Object.assign({}, details) ]);
|
|
|
|
}
|
|
|
|
|
|
|
|
const stop = process.hrtime.bigint();
|
|
|
|
|
|
|
|
console.log(`Matched ${requests.length} requests in ${nanoToMilli(stop - start)}`);
|
|
|
|
console.log(`\tNot blocked: ${notBlockedCount} requests`);
|
|
|
|
console.log(`\tBlocked: ${blockedCount} requests`);
|
|
|
|
console.log(`\tUnblocked: ${unblockedCount} requests`);
|
|
|
|
console.log(`\tAverage: ${nanoToMicro((stop - start) / BigInt(requests.length))} per request`);
|
|
|
|
|
|
|
|
if ( RECORD ) {
|
|
|
|
write('data/snfe.json', JSON.stringify(results, null, 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( COMPARE ) {
|
|
|
|
const diffs = await compare(results);
|
|
|
|
if ( Array.isArray(diffs) ) {
|
|
|
|
write('data/snfe.diffs.json', JSON.stringify(diffs, null, 2));
|
|
|
|
}
|
|
|
|
console.log(`Compare: ${diffs.length} requests differ`);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( MAXCOST ) {
|
|
|
|
const costly = results.slice().sort((a,b) => b[1].t - a[1].t).slice(0, 1000);
|
|
|
|
write('data/snfe.maxcost.json', JSON.stringify(costly, null, 2));
|
|
|
|
}
|
|
|
|
|
2021-08-29 06:58:20 -06:00
|
|
|
if ( MEDCOST ) {
|
|
|
|
const median = results.length >>> 1;
|
|
|
|
const costly = results.slice().sort((a,b) => b[1].t - a[1].t).slice(median - 500, median + 500);
|
|
|
|
write('data/snfe.medcost.json', JSON.stringify(costly, null, 2));
|
|
|
|
}
|
|
|
|
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( MINCOST ) {
|
2021-08-29 06:58:20 -06:00
|
|
|
const costly = results.slice().sort((a,b) => b[1].t - a[1].t).slice(-1000);
|
2021-08-17 10:48:39 -06:00
|
|
|
write('data/snfe.mincost.json', JSON.stringify(costly, null, 2));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-15 13:47:40 -06:00
|
|
|
async function compare(results) {
|
|
|
|
let before;
|
|
|
|
try {
|
|
|
|
const raw = await read('data/snfe.json');
|
|
|
|
before = new Map(JSON.parse(raw));
|
|
|
|
} catch(ex) {
|
|
|
|
console.log(ex);
|
|
|
|
console.log('Nothing to compare');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const after = new Map(results);
|
|
|
|
const diffs = [];
|
|
|
|
for ( let i = 0; i < results.length; i++ ) {
|
|
|
|
const a = before.get(i);
|
|
|
|
const b = after.get(i);
|
|
|
|
if ( a.r === b.r ) { continue; }
|
2021-09-01 16:26:36 -06:00
|
|
|
diffs.push([ i, {
|
|
|
|
type: a.type,
|
|
|
|
url: a.url,
|
|
|
|
originURL: a.originURL,
|
|
|
|
before: { r: a.r, f: a.f, t: a.t },
|
|
|
|
after: { r: b.r, f: b.f, t: b.t },
|
|
|
|
}]);
|
2021-08-15 13:47:40 -06:00
|
|
|
}
|
|
|
|
return diffs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2021-08-17 10:48:39 -06:00
|
|
|
async function matchRequestModifiers(engine, requests) {
|
|
|
|
const results = {
|
|
|
|
'csp': [],
|
|
|
|
'redirect-rule': [],
|
|
|
|
'removeparam': [],
|
|
|
|
};
|
|
|
|
|
|
|
|
const details = {
|
|
|
|
f: undefined,
|
|
|
|
type: '',
|
|
|
|
url: '',
|
|
|
|
originURL: '',
|
|
|
|
t: 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
let modifiedCount = 0;
|
|
|
|
|
|
|
|
const start = process.hrtime.bigint();
|
|
|
|
for ( let i = 0; i < requests.length; i++ ) {
|
|
|
|
const request = requests[i];
|
|
|
|
details.type = WEBREQUEST_OPTIONS[request.cpt];
|
|
|
|
details.url = request.url;
|
|
|
|
details.originURL = request.frameUrl;
|
|
|
|
const r = engine.matchRequest(details);
|
2021-08-17 14:49:43 -06:00
|
|
|
let modified = false;
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( r !== 1 && details.type === 'sub_frame' ) {
|
|
|
|
const reqstart = process.hrtime.bigint();
|
|
|
|
const directives = engine.matchAndFetchModifiers(details, 'csp');
|
|
|
|
if ( directives !== undefined ) {
|
2021-08-17 14:49:43 -06:00
|
|
|
modified = true;
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( NEED_RESULTS ) {
|
|
|
|
const reqstop = process.hrtime.bigint();
|
2021-12-06 06:01:05 -07:00
|
|
|
details.f = directives.map(a => `${a.result}:${a.logData().raw}`).sort();
|
2021-08-17 10:48:39 -06:00
|
|
|
details.t = Math.round(Number(reqstop - reqstart) / 10) / 100;
|
|
|
|
results['csp'].push([ i, Object.assign({}, details) ]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( r === 1 ) {
|
|
|
|
const reqstart = process.hrtime.bigint();
|
|
|
|
const directives = engine.matchAndFetchModifiers(details, 'redirect-rule');
|
|
|
|
if ( directives !== undefined ) {
|
2021-08-17 14:49:43 -06:00
|
|
|
modified = true;
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( NEED_RESULTS ) {
|
|
|
|
const reqstop = process.hrtime.bigint();
|
2021-12-06 06:01:05 -07:00
|
|
|
details.f = directives.map(a => `${a.result}:${a.logData().raw}`).sort();
|
2021-08-17 10:48:39 -06:00
|
|
|
details.t = Math.round(Number(reqstop - reqstart) / 10) / 100;
|
|
|
|
results['redirect-rule'].push([ i, Object.assign({}, details) ]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-08-17 14:49:43 -06:00
|
|
|
if ( r !== 1 && engine.hasQuery(details) ) {
|
2021-08-17 10:48:39 -06:00
|
|
|
const reqstart = process.hrtime.bigint();
|
|
|
|
const directives = engine.matchAndFetchModifiers(details, 'removeparam');
|
|
|
|
if ( directives !== undefined ) {
|
2021-08-17 14:49:43 -06:00
|
|
|
modified = true;
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( NEED_RESULTS ) {
|
|
|
|
const reqstop = process.hrtime.bigint();
|
2021-12-06 06:01:05 -07:00
|
|
|
details.f = directives.map(a => `${a.result}:${a.logData().raw}`).sort();
|
2021-08-17 10:48:39 -06:00
|
|
|
details.t = Math.round(Number(reqstop - reqstart) / 10) / 100;
|
|
|
|
results['removeparam'].push([ i, Object.assign({}, details) ]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-08-17 14:49:43 -06:00
|
|
|
if ( modified ) {
|
|
|
|
modifiedCount += 1;
|
|
|
|
}
|
2021-08-17 10:48:39 -06:00
|
|
|
}
|
|
|
|
const stop = process.hrtime.bigint();
|
|
|
|
|
|
|
|
console.log(`Matched-modified ${requests.length} requests in ${nanoToMilli(stop - start)}`);
|
|
|
|
console.log(`\t${modifiedCount} modifiers found`);
|
2021-08-17 14:49:43 -06:00
|
|
|
console.log(`\tAverage: ${nanoToMicro((stop - start) / BigInt(requests.length))} per request`);
|
2021-08-17 10:48:39 -06:00
|
|
|
|
|
|
|
if ( RECORD ) {
|
|
|
|
write('data/snfe.modifiers.json', JSON.stringify(results, null, 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( COMPARE ) {
|
|
|
|
const diffs = await compareModifiers(results);
|
|
|
|
if ( Array.isArray(diffs) ) {
|
|
|
|
write('data/snfe.modifiers.diffs.json', JSON.stringify(diffs, null, 2));
|
|
|
|
}
|
|
|
|
console.log(`Compare: ${diffs.length} modified requests differ`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async function compareModifiers(afterResults) {
|
|
|
|
let beforeResults;
|
|
|
|
try {
|
|
|
|
const raw = await read('data/snfe.modifiers.json');
|
|
|
|
beforeResults = JSON.parse(raw);
|
|
|
|
} catch(ex) {
|
|
|
|
console.log(ex);
|
|
|
|
console.log('Nothing to compare');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const diffs = [];
|
|
|
|
for ( const modifier of [ 'csp', 'redirect-rule', 'removeparam' ] ) {
|
|
|
|
const before = new Map(beforeResults[modifier]);
|
|
|
|
const after = new Map(afterResults[modifier]);
|
|
|
|
for ( const [ i, b ] of before ) {
|
|
|
|
const a = after.get(i);
|
2021-12-06 06:01:05 -07:00
|
|
|
if ( a !== undefined && JSON.stringify(a.f) === JSON.stringify(b.f) ) { continue; }
|
|
|
|
diffs.push([ i, {
|
|
|
|
type: b.type,
|
|
|
|
url: b.url,
|
|
|
|
originURL: b.originURL,
|
|
|
|
before: { f: b.f, t: b.t },
|
|
|
|
after: a !== undefined ? { f: a.f, t: a.t } : null,
|
|
|
|
}]);
|
2021-08-17 10:48:39 -06:00
|
|
|
}
|
|
|
|
for ( const [ i, a ] of after ) {
|
|
|
|
const b = before.get(i);
|
|
|
|
if ( b !== undefined ) { continue; }
|
2021-12-06 06:01:05 -07:00
|
|
|
diffs.push([ i, {
|
|
|
|
type: a.type,
|
|
|
|
url: a.url,
|
|
|
|
originURL: a.originURL,
|
|
|
|
before: null,
|
|
|
|
after: { f: a.f, t: a.t },
|
|
|
|
}]);
|
2021-08-17 10:48:39 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return diffs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2021-08-15 13:47:40 -06:00
|
|
|
async function bench() {
|
|
|
|
if ( WASM ) {
|
|
|
|
try {
|
|
|
|
const result = await enableWASM();
|
|
|
|
if ( result === true ) {
|
|
|
|
console.log('WASM code paths enabled');
|
|
|
|
}
|
|
|
|
} catch(ex) {
|
|
|
|
console.log(ex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const require = createRequire(import.meta.url); // jshint ignore:line
|
|
|
|
const requests = await require('./node_modules/scaling-palm-tree/requests.json');
|
|
|
|
const engine = await StaticNetFilteringEngine.create();
|
|
|
|
|
|
|
|
let start = process.hrtime.bigint();
|
|
|
|
await engine.useLists([
|
2023-05-07 07:19:01 -06:00
|
|
|
read('assets/ublock/filters.min.txt')
|
2021-08-15 13:47:40 -06:00
|
|
|
.then(raw => ({ name: 'filters', raw })),
|
2022-12-23 10:55:15 -07:00
|
|
|
read('assets/ublock/badware.txt')
|
|
|
|
.then(raw => ({ name: 'badware', raw })),
|
2023-05-07 07:19:01 -06:00
|
|
|
read('assets/ublock/privacy.min.txt')
|
2021-08-15 13:47:40 -06:00
|
|
|
.then(raw => ({ name: 'privacy', raw })),
|
2022-12-23 10:55:15 -07:00
|
|
|
read('assets/ublock/quick-fixes.txt')
|
|
|
|
.then(raw => ({ name: 'quick-fixes.txt', raw })),
|
2021-08-15 13:47:40 -06:00
|
|
|
read('assets/ublock/unbreak.txt')
|
|
|
|
.then(raw => ({ name: 'unbreak.txt', raw })),
|
2022-12-22 11:13:38 -07:00
|
|
|
read('assets/thirdparties/easylist/easylist.txt')
|
2021-08-15 13:47:40 -06:00
|
|
|
.then(raw => ({ name: 'easylist', raw })),
|
2022-12-22 11:13:38 -07:00
|
|
|
read('assets/thirdparties/easylist/easyprivacy.txt')
|
2021-08-15 13:47:40 -06:00
|
|
|
.then(raw => ({ name: 'easyprivacy', raw })),
|
|
|
|
read('assets/thirdparties/pgl.yoyo.org/as/serverlist')
|
|
|
|
.then(raw => ({ name: 'PGL', raw })),
|
|
|
|
read('assets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt')
|
|
|
|
.then(raw => ({ name: 'urlhaus', raw })),
|
|
|
|
]);
|
|
|
|
let stop = process.hrtime.bigint();
|
|
|
|
console.log(`Filter lists parsed-compiled-loaded in ${nanoToMilli(stop - start)}`);
|
|
|
|
|
|
|
|
// Dry run to let JS engine optimize hot JS code paths
|
2021-08-17 10:48:39 -06:00
|
|
|
for ( let i = 0; i < requests.length; i += 8 ) {
|
2021-08-15 13:47:40 -06:00
|
|
|
const request = requests[i];
|
2021-08-17 10:48:39 -06:00
|
|
|
void engine.matchRequest({
|
|
|
|
type: WEBREQUEST_OPTIONS[request.cpt],
|
|
|
|
url: request.url,
|
|
|
|
originURL: request.frameUrl,
|
|
|
|
});
|
2021-08-15 13:47:40 -06:00
|
|
|
}
|
|
|
|
|
2021-08-17 10:48:39 -06:00
|
|
|
if ( MODIFIERS === false ) {
|
|
|
|
matchRequests(engine, requests);
|
|
|
|
} else {
|
|
|
|
matchRequestModifiers(engine, requests);
|
2021-08-15 13:47:40 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
StaticNetFilteringEngine.release();
|
|
|
|
}
|
|
|
|
|
|
|
|
bench();
|
|
|
|
|
|
|
|
/******************************************************************************/
|