mirror of https://github.com/gorhill/uBlock.git
Remove classes specialized in handling single-wildcarded patterns
Turns out the various benchmarks show no benefits when compiling filters whose pattern contains a single wildcard character into specialized classes which threat the pattern as two sub-patterns, and actually there is a slight improvement in performance as per benchamrks when treating these patterns as generic ones. This also fixes the following related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1207
This commit is contained in:
parent
55fc4ba5e5
commit
68e14793cc
|
@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line
|
||||||
|
|
||||||
// Read-only
|
// Read-only
|
||||||
systemSettings: {
|
systemSettings: {
|
||||||
compiledMagic: 40, // Increase when compiled format changes
|
compiledMagic: 41, // Increase when compiled format changes
|
||||||
selfieMagic: 40, // Increase when selfie format changes
|
selfieMagic: 41, // Increase when selfie format changes
|
||||||
},
|
},
|
||||||
|
|
||||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
|
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
|
||||||
|
|
|
@ -151,9 +151,9 @@ const FilteringContext = class {
|
||||||
}
|
}
|
||||||
|
|
||||||
fromDetails({ originURL, url, type }) {
|
fromDetails({ originURL, url, type }) {
|
||||||
this.setDocOriginFromURL(originURL);
|
this.setDocOriginFromURL(originURL)
|
||||||
this.setURL(url);
|
.setURL(url)
|
||||||
this.setType(type);
|
.setType(type);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -641,6 +641,9 @@ const Parser = class {
|
||||||
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Pointless trailing caret (when preceded by a wildcard)
|
||||||
|
// TODO
|
||||||
|
//
|
||||||
// Pointless left-hand pattern anchoring
|
// Pointless left-hand pattern anchoring
|
||||||
if (
|
if (
|
||||||
(
|
(
|
||||||
|
|
|
@ -597,92 +597,6 @@ const filterLogData = (idata, details) => {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
const filterPattern = {
|
|
||||||
compile: function(parsed, units) {
|
|
||||||
if ( parsed.isRegex ) {
|
|
||||||
units.push(FilterRegex.compile(parsed));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const pattern = parsed.pattern;
|
|
||||||
if ( pattern === '*' ) {
|
|
||||||
units.push(FilterTrue.compile());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if ( parsed.tokenHash === NO_TOKEN_HASH ) {
|
|
||||||
units.push(FilterPatternGeneric.compile(parsed));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === -1 ) {
|
|
||||||
units.push(FilterPatternPlain.compile(parsed));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
parsed.secondWildcardPos !== -1 ||
|
|
||||||
parsed.secondCaretPos !== -1 ||
|
|
||||||
parsed.firstCaretPos !== -1 && (
|
|
||||||
parsed.firstWildcardPos === -1 ||
|
|
||||||
parsed.firstWildcardPos !== (parsed.firstCaretPos + 1)
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
return this.compileGeneric(parsed, units);
|
|
||||||
}
|
|
||||||
const hasCaretCombo = parsed.firstCaretPos !== -1;
|
|
||||||
const sright = pattern.slice(parsed.firstWildcardPos + 1);
|
|
||||||
const sleft = pattern.slice(
|
|
||||||
0,
|
|
||||||
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
|
|
||||||
);
|
|
||||||
if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
|
|
||||||
parsed.pattern = sleft;
|
|
||||||
units.push(FilterPatternPlain.compile(parsed));
|
|
||||||
parsed.pattern = sright;
|
|
||||||
units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// parsed.tokenBeg > parsed.firstWildcardPos
|
|
||||||
parsed.pattern = sright;
|
|
||||||
parsed.tokenBeg -= parsed.firstWildcardPos + 1;
|
|
||||||
units.push(FilterPatternPlain.compile(parsed));
|
|
||||||
parsed.pattern = sleft;
|
|
||||||
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
|
|
||||||
},
|
|
||||||
compileGeneric: function(parsed, units) {
|
|
||||||
const pattern = parsed.pattern;
|
|
||||||
// Optimize special case: plain pattern with trailing caret
|
|
||||||
if (
|
|
||||||
parsed.firstWildcardPos === -1 &&
|
|
||||||
parsed.firstCaretPos === (pattern.length - 1)
|
|
||||||
) {
|
|
||||||
parsed.pattern = pattern.slice(0, -1);
|
|
||||||
units.push(FilterPatternPlain.compile(parsed));
|
|
||||||
units.push(FilterTrailingSeparator.compile());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Use a plain pattern as a first test for whether the generic pattern
|
|
||||||
// needs to be matched.
|
|
||||||
// TODO: inconclusive, investigate more.
|
|
||||||
//let left = parsed.tokenBeg;
|
|
||||||
//while ( left > 0 ) {
|
|
||||||
// const c = pattern.charCodeAt(left-1);
|
|
||||||
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
|
||||||
// left -= 1;
|
|
||||||
//}
|
|
||||||
//let right = parsed.tokenBeg + parsed.token.length;
|
|
||||||
//while ( right < pattern.length ) {
|
|
||||||
// const c = pattern.charCodeAt(right);
|
|
||||||
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
|
||||||
// right += 1;
|
|
||||||
//}
|
|
||||||
//parsed.pattern = pattern.slice(left, right);
|
|
||||||
//parsed.tokenBeg -= left;
|
|
||||||
//units.push(FilterPatternPlain.compile(parsed));
|
|
||||||
//parsed.pattern = pattern;
|
|
||||||
units.push(FilterPatternGeneric.compile(parsed));
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
const FilterTrue = class {
|
const FilterTrue = class {
|
||||||
static match() {
|
static match() {
|
||||||
return true;
|
return true;
|
||||||
|
@ -856,151 +770,6 @@ registerFilterClass(FilterPatternPlainX);
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525
|
|
||||||
// Mind that the left part may be empty.
|
|
||||||
|
|
||||||
const FilterPatternLeft = class {
|
|
||||||
static match(idata) {
|
|
||||||
const left = bidiTrie.indexOf(
|
|
||||||
0,
|
|
||||||
$patternMatchLeft,
|
|
||||||
filterData[idata+1],
|
|
||||||
filterData[idata+2]
|
|
||||||
);
|
|
||||||
if ( left === -1 ) { return false; }
|
|
||||||
$patternMatchLeft = left;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static compile(details, ex) {
|
|
||||||
return [
|
|
||||||
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
|
|
||||||
details.pattern
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
static fromCompiled(args) {
|
|
||||||
const idata = filterDataAllocLen(3);
|
|
||||||
filterData[idata+0] = args[0]; // fid
|
|
||||||
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
|
|
||||||
filterData[idata+2] = args[1].length; // n
|
|
||||||
return idata;
|
|
||||||
}
|
|
||||||
|
|
||||||
static logData(idata, details) {
|
|
||||||
details.pattern.unshift('*');
|
|
||||||
const n = filterData[idata+2];
|
|
||||||
if ( n === 0 ) { return; }
|
|
||||||
const s = bidiTrie.extractString(filterData[idata+1], n);
|
|
||||||
details.pattern.unshift(s);
|
|
||||||
details.regex.unshift(restrFromPlainPattern(s), '.*');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
registerFilterClass(FilterPatternLeft);
|
|
||||||
|
|
||||||
|
|
||||||
const FilterPatternLeftEx = class extends FilterPatternLeft {
|
|
||||||
static match(idata) {
|
|
||||||
const i = filterData[idata+1];
|
|
||||||
const n = filterData[idata+2];
|
|
||||||
let left = 0;
|
|
||||||
for (;;) {
|
|
||||||
left = bidiTrie.indexOf(
|
|
||||||
left,
|
|
||||||
$patternMatchLeft - 1,
|
|
||||||
i,
|
|
||||||
n
|
|
||||||
);
|
|
||||||
if ( left === -1 ) { return false; }
|
|
||||||
if ( isSeparatorChar(bidiTrie.haystack[left + n]) ) { break; }
|
|
||||||
left += 1;
|
|
||||||
}
|
|
||||||
$patternMatchLeft = left;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static logData(idata, details) {
|
|
||||||
details.pattern.unshift('^*');
|
|
||||||
const n = filterData[idata+2];
|
|
||||||
if ( n === 0 ) { return; }
|
|
||||||
const s = bidiTrie.extractString(filterData[idata+1], n);
|
|
||||||
details.pattern.unshift(s);
|
|
||||||
details.regex.unshift(restrFromPlainPattern(s), restrSeparator, '.*');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
registerFilterClass(FilterPatternLeftEx);
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
const FilterPatternRight = class {
|
|
||||||
static match(idata) {
|
|
||||||
const n = filterData[idata+2];
|
|
||||||
const right = bidiTrie.lastIndexOf(
|
|
||||||
$patternMatchRight, bidiTrie.haystackLen,
|
|
||||||
filterData[idata+1],
|
|
||||||
n
|
|
||||||
);
|
|
||||||
if ( right === -1 ) { return false; }
|
|
||||||
$patternMatchRight = right + n;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static compile(details, ex) {
|
|
||||||
return [
|
|
||||||
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
|
|
||||||
details.pattern
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
static fromCompiled(args) {
|
|
||||||
const idata = filterDataAllocLen(3);
|
|
||||||
filterData[idata+0] = args[0]; // fid
|
|
||||||
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
|
|
||||||
filterData[idata+2] = args[1].length; // n
|
|
||||||
return idata;
|
|
||||||
}
|
|
||||||
|
|
||||||
static logData(idata, details) {
|
|
||||||
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
|
|
||||||
details.pattern.push('*', s);
|
|
||||||
details.regex.push('.*', restrFromPlainPattern(s));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
registerFilterClass(FilterPatternRight);
|
|
||||||
|
|
||||||
|
|
||||||
const FilterPatternRightEx = class extends FilterPatternRight {
|
|
||||||
static match(idata) {
|
|
||||||
const n = filterData[idata+2];
|
|
||||||
const left = $patternMatchRight;
|
|
||||||
const right = bidiTrie.lastIndexOf(
|
|
||||||
left + 1,
|
|
||||||
bidiTrie.haystackLen,
|
|
||||||
filterData[idata+1],
|
|
||||||
n
|
|
||||||
);
|
|
||||||
if ( right === -1 ) { return false; }
|
|
||||||
if ( isSeparatorChar(bidiTrie.haystack[left]) === false ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
$patternMatchRight = right + n;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static logData(idata, details) {
|
|
||||||
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
|
|
||||||
details.pattern.push('^*', s);
|
|
||||||
details.regex.push(restrSeparator, '.*', restrFromPlainPattern(s));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
registerFilterClass(FilterPatternRightEx);
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
const FilterPatternGeneric = class {
|
const FilterPatternGeneric = class {
|
||||||
static match(idata) {
|
static match(idata) {
|
||||||
const refs = filterRefs[filterData[idata+2]];
|
const refs = filterRefs[filterData[idata+2]];
|
||||||
|
@ -3395,7 +3164,7 @@ class FilterCompiler {
|
||||||
const leftAnchored = (this.anchor & 0b010) !== 0;
|
const leftAnchored = (this.anchor & 0b010) !== 0;
|
||||||
for ( const entity of entities ) {
|
for ( const entity of entities ) {
|
||||||
const units = [];
|
const units = [];
|
||||||
filterPattern.compile(this, units);
|
this.compilePattern(units);
|
||||||
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
|
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
|
||||||
filterOrigin.compile([ entity ], true, units);
|
filterOrigin.compile([ entity ], true, units);
|
||||||
this.compileToAtomicFilter(
|
this.compileToAtomicFilter(
|
||||||
|
@ -3409,7 +3178,7 @@ class FilterCompiler {
|
||||||
const units = [];
|
const units = [];
|
||||||
|
|
||||||
// Pattern
|
// Pattern
|
||||||
filterPattern.compile(this, units);
|
this.compilePattern(units);
|
||||||
|
|
||||||
// Anchor
|
// Anchor
|
||||||
if ( (this.anchor & 0b100) !== 0 ) {
|
if ( (this.anchor & 0b100) !== 0 ) {
|
||||||
|
@ -3493,6 +3262,36 @@ class FilterCompiler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
compilePattern(units) {
|
||||||
|
if ( this.isRegex ) {
|
||||||
|
units.push(FilterRegex.compile(this));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( this.pattern === '*' ) {
|
||||||
|
units.push(FilterTrue.compile());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( this.tokenHash === NO_TOKEN_HASH ) {
|
||||||
|
units.push(FilterPatternGeneric.compile(this));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( this.firstWildcardPos === -1 && this.firstCaretPos === -1 ) {
|
||||||
|
units.push(FilterPatternPlain.compile(this));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Optimize special case: plain pattern with trailing caret
|
||||||
|
if (
|
||||||
|
this.firstWildcardPos === -1 &&
|
||||||
|
this.firstCaretPos === (this.pattern.length - 1)
|
||||||
|
) {
|
||||||
|
this.pattern = this.pattern.slice(0, -1);
|
||||||
|
units.push(FilterPatternPlain.compile(this));
|
||||||
|
units.push(FilterTrailingSeparator.compile());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
units.push(FilterPatternGeneric.compile(this));
|
||||||
|
}
|
||||||
|
|
||||||
compileToAtomicFilter(fdata, writer) {
|
compileToAtomicFilter(fdata, writer) {
|
||||||
const catBits = this.action | this.party;
|
const catBits = this.action | this.party;
|
||||||
let { typeBits } = this;
|
let { typeBits } = this;
|
||||||
|
@ -3544,8 +3343,8 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
const FilterContainer = function() {
|
const FilterContainer = function() {
|
||||||
this.compilerVersion = '2';
|
this.compilerVersion = '4';
|
||||||
this.selfieVersion = '3';
|
this.selfieVersion = '4';
|
||||||
|
|
||||||
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
|
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
|
||||||
this.optimizeTaskId = undefined;
|
this.optimizeTaskId = undefined;
|
||||||
|
@ -3697,6 +3496,8 @@ FilterContainer.prototype.freeze = function() {
|
||||||
this.goodFilters.clear();
|
this.goodFilters.clear();
|
||||||
filterArgsToUnit.clear();
|
filterArgsToUnit.clear();
|
||||||
|
|
||||||
|
//this.filterClassHistogram();
|
||||||
|
|
||||||
// Optimizing is not critical for the static network filtering engine to
|
// Optimizing is not critical for the static network filtering engine to
|
||||||
// work properly, so defer this until later to allow for reduced delay to
|
// work properly, so defer this until later to allow for reduced delay to
|
||||||
// readiness when no valid selfie is available.
|
// readiness when no valid selfie is available.
|
||||||
|
@ -3716,8 +3517,6 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
|
||||||
this.optimizeTaskId = undefined;
|
this.optimizeTaskId = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
//this.filterClassHistogram();
|
|
||||||
|
|
||||||
const later = throttle => {
|
const later = throttle => {
|
||||||
this.optimizeTaskId = queueTask(( ) => {
|
this.optimizeTaskId = queueTask(( ) => {
|
||||||
this.optimizeTaskId = undefined;
|
this.optimizeTaskId = undefined;
|
||||||
|
|
Loading…
Reference in New Issue