mirror of https://github.com/gorhill/uBlock.git
Remove classes specialized in handling single-wildcarded patterns
Turns out the various benchmarks show no benefits when compiling filters whose pattern contains a single wildcard character into specialized classes which threat the pattern as two sub-patterns, and actually there is a slight improvement in performance as per benchamrks when treating these patterns as generic ones. This also fixes the following related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1207
This commit is contained in:
parent
55fc4ba5e5
commit
68e14793cc
|
@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line
|
|||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 40, // Increase when compiled format changes
|
||||
selfieMagic: 40, // Increase when selfie format changes
|
||||
compiledMagic: 41, // Increase when compiled format changes
|
||||
selfieMagic: 41, // Increase when selfie format changes
|
||||
},
|
||||
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
|
||||
|
|
|
@ -151,9 +151,9 @@ const FilteringContext = class {
|
|||
}
|
||||
|
||||
fromDetails({ originURL, url, type }) {
|
||||
this.setDocOriginFromURL(originURL);
|
||||
this.setURL(url);
|
||||
this.setType(type);
|
||||
this.setDocOriginFromURL(originURL)
|
||||
.setURL(url)
|
||||
.setType(type);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -641,6 +641,9 @@ const Parser = class {
|
|||
this.flavorBits &= ~BITFlavorNetRightAnchor;
|
||||
}
|
||||
}
|
||||
// Pointless trailing caret (when preceded by a wildcard)
|
||||
// TODO
|
||||
//
|
||||
// Pointless left-hand pattern anchoring
|
||||
if (
|
||||
(
|
||||
|
|
|
@ -597,92 +597,6 @@ const filterLogData = (idata, details) => {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const filterPattern = {
|
||||
compile: function(parsed, units) {
|
||||
if ( parsed.isRegex ) {
|
||||
units.push(FilterRegex.compile(parsed));
|
||||
return;
|
||||
}
|
||||
const pattern = parsed.pattern;
|
||||
if ( pattern === '*' ) {
|
||||
units.push(FilterTrue.compile());
|
||||
return;
|
||||
}
|
||||
if ( parsed.tokenHash === NO_TOKEN_HASH ) {
|
||||
units.push(FilterPatternGeneric.compile(parsed));
|
||||
return;
|
||||
}
|
||||
if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === -1 ) {
|
||||
units.push(FilterPatternPlain.compile(parsed));
|
||||
return;
|
||||
}
|
||||
if (
|
||||
parsed.secondWildcardPos !== -1 ||
|
||||
parsed.secondCaretPos !== -1 ||
|
||||
parsed.firstCaretPos !== -1 && (
|
||||
parsed.firstWildcardPos === -1 ||
|
||||
parsed.firstWildcardPos !== (parsed.firstCaretPos + 1)
|
||||
)
|
||||
) {
|
||||
return this.compileGeneric(parsed, units);
|
||||
}
|
||||
const hasCaretCombo = parsed.firstCaretPos !== -1;
|
||||
const sright = pattern.slice(parsed.firstWildcardPos + 1);
|
||||
const sleft = pattern.slice(
|
||||
0,
|
||||
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
|
||||
);
|
||||
if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
|
||||
parsed.pattern = sleft;
|
||||
units.push(FilterPatternPlain.compile(parsed));
|
||||
parsed.pattern = sright;
|
||||
units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
|
||||
return;
|
||||
}
|
||||
// parsed.tokenBeg > parsed.firstWildcardPos
|
||||
parsed.pattern = sright;
|
||||
parsed.tokenBeg -= parsed.firstWildcardPos + 1;
|
||||
units.push(FilterPatternPlain.compile(parsed));
|
||||
parsed.pattern = sleft;
|
||||
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
|
||||
},
|
||||
compileGeneric: function(parsed, units) {
|
||||
const pattern = parsed.pattern;
|
||||
// Optimize special case: plain pattern with trailing caret
|
||||
if (
|
||||
parsed.firstWildcardPos === -1 &&
|
||||
parsed.firstCaretPos === (pattern.length - 1)
|
||||
) {
|
||||
parsed.pattern = pattern.slice(0, -1);
|
||||
units.push(FilterPatternPlain.compile(parsed));
|
||||
units.push(FilterTrailingSeparator.compile());
|
||||
return;
|
||||
}
|
||||
// Use a plain pattern as a first test for whether the generic pattern
|
||||
// needs to be matched.
|
||||
// TODO: inconclusive, investigate more.
|
||||
//let left = parsed.tokenBeg;
|
||||
//while ( left > 0 ) {
|
||||
// const c = pattern.charCodeAt(left-1);
|
||||
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
||||
// left -= 1;
|
||||
//}
|
||||
//let right = parsed.tokenBeg + parsed.token.length;
|
||||
//while ( right < pattern.length ) {
|
||||
// const c = pattern.charCodeAt(right);
|
||||
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
|
||||
// right += 1;
|
||||
//}
|
||||
//parsed.pattern = pattern.slice(left, right);
|
||||
//parsed.tokenBeg -= left;
|
||||
//units.push(FilterPatternPlain.compile(parsed));
|
||||
//parsed.pattern = pattern;
|
||||
units.push(FilterPatternGeneric.compile(parsed));
|
||||
},
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterTrue = class {
|
||||
static match() {
|
||||
return true;
|
||||
|
@ -856,151 +770,6 @@ registerFilterClass(FilterPatternPlainX);
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525
|
||||
// Mind that the left part may be empty.
|
||||
|
||||
const FilterPatternLeft = class {
|
||||
static match(idata) {
|
||||
const left = bidiTrie.indexOf(
|
||||
0,
|
||||
$patternMatchLeft,
|
||||
filterData[idata+1],
|
||||
filterData[idata+2]
|
||||
);
|
||||
if ( left === -1 ) { return false; }
|
||||
$patternMatchLeft = left;
|
||||
return true;
|
||||
}
|
||||
|
||||
static compile(details, ex) {
|
||||
return [
|
||||
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
|
||||
details.pattern
|
||||
];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
const idata = filterDataAllocLen(3);
|
||||
filterData[idata+0] = args[0]; // fid
|
||||
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
|
||||
filterData[idata+2] = args[1].length; // n
|
||||
return idata;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.pattern.unshift('*');
|
||||
const n = filterData[idata+2];
|
||||
if ( n === 0 ) { return; }
|
||||
const s = bidiTrie.extractString(filterData[idata+1], n);
|
||||
details.pattern.unshift(s);
|
||||
details.regex.unshift(restrFromPlainPattern(s), '.*');
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterPatternLeft);
|
||||
|
||||
|
||||
const FilterPatternLeftEx = class extends FilterPatternLeft {
|
||||
static match(idata) {
|
||||
const i = filterData[idata+1];
|
||||
const n = filterData[idata+2];
|
||||
let left = 0;
|
||||
for (;;) {
|
||||
left = bidiTrie.indexOf(
|
||||
left,
|
||||
$patternMatchLeft - 1,
|
||||
i,
|
||||
n
|
||||
);
|
||||
if ( left === -1 ) { return false; }
|
||||
if ( isSeparatorChar(bidiTrie.haystack[left + n]) ) { break; }
|
||||
left += 1;
|
||||
}
|
||||
$patternMatchLeft = left;
|
||||
return true;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.pattern.unshift('^*');
|
||||
const n = filterData[idata+2];
|
||||
if ( n === 0 ) { return; }
|
||||
const s = bidiTrie.extractString(filterData[idata+1], n);
|
||||
details.pattern.unshift(s);
|
||||
details.regex.unshift(restrFromPlainPattern(s), restrSeparator, '.*');
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterPatternLeftEx);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterPatternRight = class {
|
||||
static match(idata) {
|
||||
const n = filterData[idata+2];
|
||||
const right = bidiTrie.lastIndexOf(
|
||||
$patternMatchRight, bidiTrie.haystackLen,
|
||||
filterData[idata+1],
|
||||
n
|
||||
);
|
||||
if ( right === -1 ) { return false; }
|
||||
$patternMatchRight = right + n;
|
||||
return true;
|
||||
}
|
||||
|
||||
static compile(details, ex) {
|
||||
return [
|
||||
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
|
||||
details.pattern
|
||||
];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
const idata = filterDataAllocLen(3);
|
||||
filterData[idata+0] = args[0]; // fid
|
||||
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
|
||||
filterData[idata+2] = args[1].length; // n
|
||||
return idata;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
|
||||
details.pattern.push('*', s);
|
||||
details.regex.push('.*', restrFromPlainPattern(s));
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterPatternRight);
|
||||
|
||||
|
||||
const FilterPatternRightEx = class extends FilterPatternRight {
|
||||
static match(idata) {
|
||||
const n = filterData[idata+2];
|
||||
const left = $patternMatchRight;
|
||||
const right = bidiTrie.lastIndexOf(
|
||||
left + 1,
|
||||
bidiTrie.haystackLen,
|
||||
filterData[idata+1],
|
||||
n
|
||||
);
|
||||
if ( right === -1 ) { return false; }
|
||||
if ( isSeparatorChar(bidiTrie.haystack[left]) === false ) {
|
||||
return false;
|
||||
}
|
||||
$patternMatchRight = right + n;
|
||||
return true;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
|
||||
details.pattern.push('^*', s);
|
||||
details.regex.push(restrSeparator, '.*', restrFromPlainPattern(s));
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterPatternRightEx);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterPatternGeneric = class {
|
||||
static match(idata) {
|
||||
const refs = filterRefs[filterData[idata+2]];
|
||||
|
@ -3395,7 +3164,7 @@ class FilterCompiler {
|
|||
const leftAnchored = (this.anchor & 0b010) !== 0;
|
||||
for ( const entity of entities ) {
|
||||
const units = [];
|
||||
filterPattern.compile(this, units);
|
||||
this.compilePattern(units);
|
||||
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
|
||||
filterOrigin.compile([ entity ], true, units);
|
||||
this.compileToAtomicFilter(
|
||||
|
@ -3409,7 +3178,7 @@ class FilterCompiler {
|
|||
const units = [];
|
||||
|
||||
// Pattern
|
||||
filterPattern.compile(this, units);
|
||||
this.compilePattern(units);
|
||||
|
||||
// Anchor
|
||||
if ( (this.anchor & 0b100) !== 0 ) {
|
||||
|
@ -3493,6 +3262,36 @@ class FilterCompiler {
|
|||
}
|
||||
}
|
||||
|
||||
compilePattern(units) {
|
||||
if ( this.isRegex ) {
|
||||
units.push(FilterRegex.compile(this));
|
||||
return;
|
||||
}
|
||||
if ( this.pattern === '*' ) {
|
||||
units.push(FilterTrue.compile());
|
||||
return;
|
||||
}
|
||||
if ( this.tokenHash === NO_TOKEN_HASH ) {
|
||||
units.push(FilterPatternGeneric.compile(this));
|
||||
return;
|
||||
}
|
||||
if ( this.firstWildcardPos === -1 && this.firstCaretPos === -1 ) {
|
||||
units.push(FilterPatternPlain.compile(this));
|
||||
return;
|
||||
}
|
||||
// Optimize special case: plain pattern with trailing caret
|
||||
if (
|
||||
this.firstWildcardPos === -1 &&
|
||||
this.firstCaretPos === (this.pattern.length - 1)
|
||||
) {
|
||||
this.pattern = this.pattern.slice(0, -1);
|
||||
units.push(FilterPatternPlain.compile(this));
|
||||
units.push(FilterTrailingSeparator.compile());
|
||||
return;
|
||||
}
|
||||
units.push(FilterPatternGeneric.compile(this));
|
||||
}
|
||||
|
||||
compileToAtomicFilter(fdata, writer) {
|
||||
const catBits = this.action | this.party;
|
||||
let { typeBits } = this;
|
||||
|
@ -3544,8 +3343,8 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
|
|||
/******************************************************************************/
|
||||
|
||||
const FilterContainer = function() {
|
||||
this.compilerVersion = '2';
|
||||
this.selfieVersion = '3';
|
||||
this.compilerVersion = '4';
|
||||
this.selfieVersion = '4';
|
||||
|
||||
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
|
||||
this.optimizeTaskId = undefined;
|
||||
|
@ -3697,6 +3496,8 @@ FilterContainer.prototype.freeze = function() {
|
|||
this.goodFilters.clear();
|
||||
filterArgsToUnit.clear();
|
||||
|
||||
//this.filterClassHistogram();
|
||||
|
||||
// Optimizing is not critical for the static network filtering engine to
|
||||
// work properly, so defer this until later to allow for reduced delay to
|
||||
// readiness when no valid selfie is available.
|
||||
|
@ -3716,8 +3517,6 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
|
|||
this.optimizeTaskId = undefined;
|
||||
}
|
||||
|
||||
//this.filterClassHistogram();
|
||||
|
||||
const later = throttle => {
|
||||
this.optimizeTaskId = queueTask(( ) => {
|
||||
this.optimizeTaskId = undefined;
|
||||
|
|
Loading…
Reference in New Issue