mirror of https://github.com/gorhill/uBlock.git
Group all compiling-related code into FilterCompiler() class
In the static network filtering engine (snfe), the compiling-related code was spread across two classes. This commit makes it so that all the compiling-related code is in FilterCompiler class, which clear purpose is to compile raw filters into a form which can be persisted and later fed to the snfe with no parsing overhead. To compile raw static network filter, the new approach is: snfe.createCompiler(parser); Then for each single raw filter to compile: compiler.compile(parser, writer); The caller is responsible to keep a reference to the compiler instance for as long as it is needed. This removes the need for the clunky code used to keep an instance of compiler alive in the snfe. Additionally, snfe.tokenHistograms() has been moved to benchmarks.js, as it has no dependency on the snfe, it's just a utility function.
This commit is contained in:
parent
4ea0d134ad
commit
85c68116bd
|
@ -27,10 +27,10 @@ import './lib/publicsuffixlist/publicsuffixlist.js';
|
|||
import './lib/punycode.js';
|
||||
|
||||
import globals from './js/globals.js';
|
||||
import staticNetFilteringEngine from './js/static-net-filtering.js';
|
||||
import { FilteringContext } from './js/filtering-context.js';
|
||||
import { LineIterator } from './js/text-utils.js';
|
||||
import { StaticFilteringParser } from './js/static-filtering-parser.js';
|
||||
import { staticNetFilteringEngine } from './js/static-net-filtering.js';
|
||||
|
||||
import {
|
||||
CompiledListReader,
|
||||
|
@ -42,6 +42,7 @@ import {
|
|||
function compileList(rawText, writer) {
|
||||
const lineIter = new LineIterator(rawText);
|
||||
const parser = new StaticFilteringParser(true);
|
||||
const compiler = staticNetFilteringEngine.createCompiler(parser);
|
||||
|
||||
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
|
||||
|
||||
|
@ -59,12 +60,12 @@ function compileList(rawText, writer) {
|
|||
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
|
||||
continue;
|
||||
}
|
||||
if ( staticNetFilteringEngine.compile(parser, writer) ) { continue; }
|
||||
if ( staticNetFilteringEngine.error !== undefined ) {
|
||||
if ( compiler.compile(parser, writer) ) { continue; }
|
||||
if ( compiler.error !== undefined ) {
|
||||
console.info(JSON.stringify({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: staticNetFilteringEngine.error
|
||||
text: compiler.error
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ function loadJSON(path) {
|
|||
function compileList(rawText, writer, options = {}) {
|
||||
const lineIter = new LineIterator(rawText);
|
||||
const parser = new StaticFilteringParser(true);
|
||||
const compiler = snfe.createCompiler(parser);
|
||||
const events = Array.isArray(options.events) ? options.events : undefined;
|
||||
|
||||
parser.setMaxTokenLength(snfe.MAX_TOKEN_LENGTH);
|
||||
|
@ -70,11 +71,11 @@ function compileList(rawText, writer, options = {}) {
|
|||
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
|
||||
continue;
|
||||
}
|
||||
if ( snfe.compile(parser, writer) ) { continue; }
|
||||
if ( snfe.error !== undefined && events !== undefined ) {
|
||||
if ( compiler.compile(parser, writer) ) { continue; }
|
||||
if ( compiler.error !== undefined && events !== undefined ) {
|
||||
options.events.push({
|
||||
type: 'error',
|
||||
text: snfe.error
|
||||
text: compiler.error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -199,6 +199,47 @@ const loadBenchmarkDataset = (( ) => {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
µb.tokenHistograms = async function() {
|
||||
const requests = await loadBenchmarkDataset();
|
||||
if ( Array.isArray(requests) === false || requests.length === 0 ) {
|
||||
console.info('No requests found to benchmark');
|
||||
return;
|
||||
}
|
||||
|
||||
console.info(`Computing token histograms...`);
|
||||
|
||||
const fctxt = new FilteringContext();
|
||||
const missTokenMap = new Map();
|
||||
const hitTokenMap = new Map();
|
||||
const reTokens = /[0-9a-z%]{2,}/g;
|
||||
|
||||
for ( let i = 0; i < requests.length; i++ ) {
|
||||
const request = requests[i];
|
||||
fctxt.setURL(request.url);
|
||||
fctxt.setDocOriginFromURL(request.frameUrl);
|
||||
fctxt.setType(request.cpt);
|
||||
const r = staticNetFilteringEngine.matchRequest(fctxt);
|
||||
for ( let [ keyword ] of request.url.toLowerCase().matchAll(reTokens) ) {
|
||||
const token = keyword.slice(0, 7);
|
||||
if ( r === 0 ) {
|
||||
missTokenMap.set(token, (missTokenMap.get(token) || 0) + 1);
|
||||
} else if ( r === 1 ) {
|
||||
hitTokenMap.set(token, (hitTokenMap.get(token) || 0) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
const customSort = (a, b) => b[1] - a[1];
|
||||
const topmisses = Array.from(missTokenMap).sort(customSort).slice(0, 100);
|
||||
for ( const [ token ] of topmisses ) {
|
||||
hitTokenMap.delete(token);
|
||||
}
|
||||
const tophits = Array.from(hitTokenMap).sort(customSort).slice(0, 100);
|
||||
console.info('Misses:', JSON.stringify(topmisses));
|
||||
console.info('Hits:', JSON.stringify(tophits));
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
µb.benchmarkDynamicNetFiltering = async function() {
|
||||
const requests = await loadBenchmarkDataset();
|
||||
if ( Array.isArray(requests) === false || requests.length === 0 ) {
|
||||
|
|
|
@ -137,9 +137,8 @@ const fromNetFilter = async function(rawFilter) {
|
|||
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
|
||||
parser.analyze(rawFilter);
|
||||
|
||||
if ( staticNetFilteringEngine.compile(parser, writer) === false ) {
|
||||
return;
|
||||
}
|
||||
const compiler = staticNetFilteringEngine.createCompiler(parser);
|
||||
if ( compiler.compile(parser, writer) === false ) { return; }
|
||||
|
||||
await initWorker();
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -973,6 +973,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
|||
// https://adblockplus.org/en/filters
|
||||
const lineIter = new LineIterator(this.preparseDirectives.prune(rawText));
|
||||
const parser = new StaticFilteringParser({ expertMode });
|
||||
const compiler = staticNetFilteringEngine.createCompiler(parser);
|
||||
|
||||
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
|
||||
|
||||
|
@ -1000,12 +1001,12 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
|||
if ( parser.patternHasUnicode() && parser.toASCII() === false ) {
|
||||
continue;
|
||||
}
|
||||
if ( staticNetFilteringEngine.compile(parser, writer) ) { continue; }
|
||||
if ( staticNetFilteringEngine.error !== undefined ) {
|
||||
if ( compiler.compile(parser, writer) ) { continue; }
|
||||
if ( compiler.error !== undefined ) {
|
||||
logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: staticNetFilteringEngine.error
|
||||
text: compiler.error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -231,14 +231,13 @@ const onPopupUpdated = (( ) => {
|
|||
popunderHostname,
|
||||
result
|
||||
) {
|
||||
if (
|
||||
fctxt.filter === undefined ||
|
||||
fctxt.filter !== 'static' ||
|
||||
fctxt.filter.token === staticNetFilteringEngine.noTokenHash
|
||||
) {
|
||||
if ( fctxt.filter === undefined || fctxt.filter !== 'static' ) {
|
||||
return 0;
|
||||
}
|
||||
if ( fctxt.filter.token === staticNetFilteringEngine.dotTokenHash ) {
|
||||
if ( fctxt.filter.isUntokenized() ) {
|
||||
return 0;
|
||||
}
|
||||
if ( fctxt.filter.isPureHostname() ) {
|
||||
return result;
|
||||
}
|
||||
const re = new RegExp(fctxt.filter.regex, 'i');
|
||||
|
|
Loading…
Reference in New Issue