Group all compiling-related code into FilterCompiler() class

In the static network filtering engine (snfe), the
compiling-related code was spread across two classes.
This commit makes it so that all the compiling-related
code is in FilterCompiler class, which clear purpose is
to compile raw filters into a form which can be persisted
and later fed to the snfe with no parsing overhead.

To compile raw static network filter, the new approach is:

    snfe.createCompiler(parser);

Then for each single raw filter to compile:

    compiler.compile(parser, writer);

The caller is responsible to keep a reference to the
compiler instance for as long as it is needed. This removes
the need for the clunky code used to keep an instance of
compiler alive in the snfe.

Additionally, snfe.tokenHistograms() has been moved to
benchmarks.js, as it has no dependency on the snfe, it's
just a utility function.
This commit is contained in:
Raymond Hill 2021-08-04 15:14:48 -04:00
parent 4ea0d134ad
commit 85c68116bd
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
7 changed files with 395 additions and 430 deletions

View File

@ -27,10 +27,10 @@ import './lib/publicsuffixlist/publicsuffixlist.js';
import './lib/punycode.js';
import globals from './js/globals.js';
import staticNetFilteringEngine from './js/static-net-filtering.js';
import { FilteringContext } from './js/filtering-context.js';
import { LineIterator } from './js/text-utils.js';
import { StaticFilteringParser } from './js/static-filtering-parser.js';
import { staticNetFilteringEngine } from './js/static-net-filtering.js';
import {
CompiledListReader,
@ -42,6 +42,7 @@ import {
function compileList(rawText, writer) {
const lineIter = new LineIterator(rawText);
const parser = new StaticFilteringParser(true);
const compiler = staticNetFilteringEngine.createCompiler(parser);
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
@ -59,12 +60,12 @@ function compileList(rawText, writer) {
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
continue;
}
if ( staticNetFilteringEngine.compile(parser, writer) ) { continue; }
if ( staticNetFilteringEngine.error !== undefined ) {
if ( compiler.compile(parser, writer) ) { continue; }
if ( compiler.error !== undefined ) {
console.info(JSON.stringify({
realm: 'message',
type: 'error',
text: staticNetFilteringEngine.error
text: compiler.error
}));
}
}

View File

@ -54,6 +54,7 @@ function loadJSON(path) {
function compileList(rawText, writer, options = {}) {
const lineIter = new LineIterator(rawText);
const parser = new StaticFilteringParser(true);
const compiler = snfe.createCompiler(parser);
const events = Array.isArray(options.events) ? options.events : undefined;
parser.setMaxTokenLength(snfe.MAX_TOKEN_LENGTH);
@ -70,11 +71,11 @@ function compileList(rawText, writer, options = {}) {
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
continue;
}
if ( snfe.compile(parser, writer) ) { continue; }
if ( snfe.error !== undefined && events !== undefined ) {
if ( compiler.compile(parser, writer) ) { continue; }
if ( compiler.error !== undefined && events !== undefined ) {
options.events.push({
type: 'error',
text: snfe.error
text: compiler.error
});
}
}

View File

@ -199,6 +199,47 @@ const loadBenchmarkDataset = (( ) => {
/******************************************************************************/
µb.tokenHistograms = async function() {
const requests = await loadBenchmarkDataset();
if ( Array.isArray(requests) === false || requests.length === 0 ) {
console.info('No requests found to benchmark');
return;
}
console.info(`Computing token histograms...`);
const fctxt = new FilteringContext();
const missTokenMap = new Map();
const hitTokenMap = new Map();
const reTokens = /[0-9a-z%]{2,}/g;
for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i];
fctxt.setURL(request.url);
fctxt.setDocOriginFromURL(request.frameUrl);
fctxt.setType(request.cpt);
const r = staticNetFilteringEngine.matchRequest(fctxt);
for ( let [ keyword ] of request.url.toLowerCase().matchAll(reTokens) ) {
const token = keyword.slice(0, 7);
if ( r === 0 ) {
missTokenMap.set(token, (missTokenMap.get(token) || 0) + 1);
} else if ( r === 1 ) {
hitTokenMap.set(token, (hitTokenMap.get(token) || 0) + 1);
}
}
}
const customSort = (a, b) => b[1] - a[1];
const topmisses = Array.from(missTokenMap).sort(customSort).slice(0, 100);
for ( const [ token ] of topmisses ) {
hitTokenMap.delete(token);
}
const tophits = Array.from(hitTokenMap).sort(customSort).slice(0, 100);
console.info('Misses:', JSON.stringify(topmisses));
console.info('Hits:', JSON.stringify(tophits));
};
/******************************************************************************/
µb.benchmarkDynamicNetFiltering = async function() {
const requests = await loadBenchmarkDataset();
if ( Array.isArray(requests) === false || requests.length === 0 ) {

View File

@ -137,9 +137,8 @@ const fromNetFilter = async function(rawFilter) {
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
parser.analyze(rawFilter);
if ( staticNetFilteringEngine.compile(parser, writer) === false ) {
return;
}
const compiler = staticNetFilteringEngine.createCompiler(parser);
if ( compiler.compile(parser, writer) === false ) { return; }
await initWorker();

File diff suppressed because it is too large Load Diff

View File

@ -973,6 +973,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
// https://adblockplus.org/en/filters
const lineIter = new LineIterator(this.preparseDirectives.prune(rawText));
const parser = new StaticFilteringParser({ expertMode });
const compiler = staticNetFilteringEngine.createCompiler(parser);
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
@ -1000,12 +1001,12 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
continue;
}
if ( staticNetFilteringEngine.compile(parser, writer) ) { continue; }
if ( staticNetFilteringEngine.error !== undefined ) {
if ( compiler.compile(parser, writer) ) { continue; }
if ( compiler.error !== undefined ) {
logger.writeOne({
realm: 'message',
type: 'error',
text: staticNetFilteringEngine.error
text: compiler.error
});
}
}

View File

@ -231,14 +231,13 @@ const onPopupUpdated = (( ) => {
popunderHostname,
result
) {
if (
fctxt.filter === undefined ||
fctxt.filter !== 'static' ||
fctxt.filter.token === staticNetFilteringEngine.noTokenHash
) {
if ( fctxt.filter === undefined || fctxt.filter !== 'static' ) {
return 0;
}
if ( fctxt.filter.token === staticNetFilteringEngine.dotTokenHash ) {
if ( fctxt.filter.isUntokenized() ) {
return 0;
}
if ( fctxt.filter.isPureHostname() ) {
return result;
}
const re = new RegExp(fctxt.filter.regex, 'i');