Remove classes specialized in handling single-wildcarded patterns

Turns out the various benchmarks show no benefits when compiling
filters whose pattern contains a single wildcard character into
specialized classes which threat the pattern as two sub-patterns,
and actually there is a slight improvement in performance as per
benchamrks when treating these patterns as generic ones.

This also fixes the following related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/1207
This commit is contained in:
Raymond Hill 2021-12-06 12:03:52 -05:00
parent 55fc4ba5e5
commit 68e14793cc
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 44 additions and 242 deletions

View File

@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 40, // Increase when compiled format changes compiledMagic: 41, // Increase when compiled format changes
selfieMagic: 40, // Increase when selfie format changes selfieMagic: 41, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -151,9 +151,9 @@ const FilteringContext = class {
} }
fromDetails({ originURL, url, type }) { fromDetails({ originURL, url, type }) {
this.setDocOriginFromURL(originURL); this.setDocOriginFromURL(originURL)
this.setURL(url); .setURL(url)
this.setType(type); .setType(type);
return this; return this;
} }

View File

@ -641,6 +641,9 @@ const Parser = class {
this.flavorBits &= ~BITFlavorNetRightAnchor; this.flavorBits &= ~BITFlavorNetRightAnchor;
} }
} }
// Pointless trailing caret (when preceded by a wildcard)
// TODO
//
// Pointless left-hand pattern anchoring // Pointless left-hand pattern anchoring
if ( if (
( (

View File

@ -597,92 +597,6 @@ const filterLogData = (idata, details) => {
/******************************************************************************/ /******************************************************************************/
const filterPattern = {
compile: function(parsed, units) {
if ( parsed.isRegex ) {
units.push(FilterRegex.compile(parsed));
return;
}
const pattern = parsed.pattern;
if ( pattern === '*' ) {
units.push(FilterTrue.compile());
return;
}
if ( parsed.tokenHash === NO_TOKEN_HASH ) {
units.push(FilterPatternGeneric.compile(parsed));
return;
}
if ( parsed.firstWildcardPos === -1 && parsed.firstCaretPos === -1 ) {
units.push(FilterPatternPlain.compile(parsed));
return;
}
if (
parsed.secondWildcardPos !== -1 ||
parsed.secondCaretPos !== -1 ||
parsed.firstCaretPos !== -1 && (
parsed.firstWildcardPos === -1 ||
parsed.firstWildcardPos !== (parsed.firstCaretPos + 1)
)
) {
return this.compileGeneric(parsed, units);
}
const hasCaretCombo = parsed.firstCaretPos !== -1;
const sright = pattern.slice(parsed.firstWildcardPos + 1);
const sleft = pattern.slice(
0,
hasCaretCombo ? parsed.firstCaretPos : parsed.firstWildcardPos
);
if ( parsed.tokenBeg < parsed.firstWildcardPos ) {
parsed.pattern = sleft;
units.push(FilterPatternPlain.compile(parsed));
parsed.pattern = sright;
units.push(FilterPatternRight.compile(parsed, hasCaretCombo));
return;
}
// parsed.tokenBeg > parsed.firstWildcardPos
parsed.pattern = sright;
parsed.tokenBeg -= parsed.firstWildcardPos + 1;
units.push(FilterPatternPlain.compile(parsed));
parsed.pattern = sleft;
units.push(FilterPatternLeft.compile(parsed, hasCaretCombo));
},
compileGeneric: function(parsed, units) {
const pattern = parsed.pattern;
// Optimize special case: plain pattern with trailing caret
if (
parsed.firstWildcardPos === -1 &&
parsed.firstCaretPos === (pattern.length - 1)
) {
parsed.pattern = pattern.slice(0, -1);
units.push(FilterPatternPlain.compile(parsed));
units.push(FilterTrailingSeparator.compile());
return;
}
// Use a plain pattern as a first test for whether the generic pattern
// needs to be matched.
// TODO: inconclusive, investigate more.
//let left = parsed.tokenBeg;
//while ( left > 0 ) {
// const c = pattern.charCodeAt(left-1);
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
// left -= 1;
//}
//let right = parsed.tokenBeg + parsed.token.length;
//while ( right < pattern.length ) {
// const c = pattern.charCodeAt(right);
// if ( c === 0x2A /* '*' */ || c === 0x5E /* '^' */ ) { break; }
// right += 1;
//}
//parsed.pattern = pattern.slice(left, right);
//parsed.tokenBeg -= left;
//units.push(FilterPatternPlain.compile(parsed));
//parsed.pattern = pattern;
units.push(FilterPatternGeneric.compile(parsed));
},
};
/******************************************************************************/
const FilterTrue = class { const FilterTrue = class {
static match() { static match() {
return true; return true;
@ -856,151 +770,6 @@ registerFilterClass(FilterPatternPlainX);
/******************************************************************************/ /******************************************************************************/
// https://github.com/gorhill/uBlock/commit/7971b223855d#commitcomment-37077525
// Mind that the left part may be empty.
const FilterPatternLeft = class {
static match(idata) {
const left = bidiTrie.indexOf(
0,
$patternMatchLeft,
filterData[idata+1],
filterData[idata+2]
);
if ( left === -1 ) { return false; }
$patternMatchLeft = left;
return true;
}
static compile(details, ex) {
return [
ex ? FilterPatternLeftEx.fid : FilterPatternLeft.fid,
details.pattern
];
}
static fromCompiled(args) {
const idata = filterDataAllocLen(3);
filterData[idata+0] = args[0]; // fid
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
filterData[idata+2] = args[1].length; // n
return idata;
}
static logData(idata, details) {
details.pattern.unshift('*');
const n = filterData[idata+2];
if ( n === 0 ) { return; }
const s = bidiTrie.extractString(filterData[idata+1], n);
details.pattern.unshift(s);
details.regex.unshift(restrFromPlainPattern(s), '.*');
}
};
registerFilterClass(FilterPatternLeft);
const FilterPatternLeftEx = class extends FilterPatternLeft {
static match(idata) {
const i = filterData[idata+1];
const n = filterData[idata+2];
let left = 0;
for (;;) {
left = bidiTrie.indexOf(
left,
$patternMatchLeft - 1,
i,
n
);
if ( left === -1 ) { return false; }
if ( isSeparatorChar(bidiTrie.haystack[left + n]) ) { break; }
left += 1;
}
$patternMatchLeft = left;
return true;
}
static logData(idata, details) {
details.pattern.unshift('^*');
const n = filterData[idata+2];
if ( n === 0 ) { return; }
const s = bidiTrie.extractString(filterData[idata+1], n);
details.pattern.unshift(s);
details.regex.unshift(restrFromPlainPattern(s), restrSeparator, '.*');
}
};
registerFilterClass(FilterPatternLeftEx);
/******************************************************************************/
const FilterPatternRight = class {
static match(idata) {
const n = filterData[idata+2];
const right = bidiTrie.lastIndexOf(
$patternMatchRight, bidiTrie.haystackLen,
filterData[idata+1],
n
);
if ( right === -1 ) { return false; }
$patternMatchRight = right + n;
return true;
}
static compile(details, ex) {
return [
ex ? FilterPatternRightEx.fid : FilterPatternRight.fid,
details.pattern
];
}
static fromCompiled(args) {
const idata = filterDataAllocLen(3);
filterData[idata+0] = args[0]; // fid
filterData[idata+1] = bidiTrie.storeString(args[1]); // i
filterData[idata+2] = args[1].length; // n
return idata;
}
static logData(idata, details) {
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
details.pattern.push('*', s);
details.regex.push('.*', restrFromPlainPattern(s));
}
};
registerFilterClass(FilterPatternRight);
const FilterPatternRightEx = class extends FilterPatternRight {
static match(idata) {
const n = filterData[idata+2];
const left = $patternMatchRight;
const right = bidiTrie.lastIndexOf(
left + 1,
bidiTrie.haystackLen,
filterData[idata+1],
n
);
if ( right === -1 ) { return false; }
if ( isSeparatorChar(bidiTrie.haystack[left]) === false ) {
return false;
}
$patternMatchRight = right + n;
return true;
}
static logData(idata, details) {
const s = bidiTrie.extractString(filterData[idata+1], filterData[idata+2]);
details.pattern.push('^*', s);
details.regex.push(restrSeparator, '.*', restrFromPlainPattern(s));
}
};
registerFilterClass(FilterPatternRightEx);
/******************************************************************************/
const FilterPatternGeneric = class { const FilterPatternGeneric = class {
static match(idata) { static match(idata) {
const refs = filterRefs[filterData[idata+2]]; const refs = filterRefs[filterData[idata+2]];
@ -3395,7 +3164,7 @@ class FilterCompiler {
const leftAnchored = (this.anchor & 0b010) !== 0; const leftAnchored = (this.anchor & 0b010) !== 0;
for ( const entity of entities ) { for ( const entity of entities ) {
const units = []; const units = [];
filterPattern.compile(this, units); this.compilePattern(units);
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); } if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
filterOrigin.compile([ entity ], true, units); filterOrigin.compile([ entity ], true, units);
this.compileToAtomicFilter( this.compileToAtomicFilter(
@ -3409,7 +3178,7 @@ class FilterCompiler {
const units = []; const units = [];
// Pattern // Pattern
filterPattern.compile(this, units); this.compilePattern(units);
// Anchor // Anchor
if ( (this.anchor & 0b100) !== 0 ) { if ( (this.anchor & 0b100) !== 0 ) {
@ -3493,6 +3262,36 @@ class FilterCompiler {
} }
} }
compilePattern(units) {
if ( this.isRegex ) {
units.push(FilterRegex.compile(this));
return;
}
if ( this.pattern === '*' ) {
units.push(FilterTrue.compile());
return;
}
if ( this.tokenHash === NO_TOKEN_HASH ) {
units.push(FilterPatternGeneric.compile(this));
return;
}
if ( this.firstWildcardPos === -1 && this.firstCaretPos === -1 ) {
units.push(FilterPatternPlain.compile(this));
return;
}
// Optimize special case: plain pattern with trailing caret
if (
this.firstWildcardPos === -1 &&
this.firstCaretPos === (this.pattern.length - 1)
) {
this.pattern = this.pattern.slice(0, -1);
units.push(FilterPatternPlain.compile(this));
units.push(FilterTrailingSeparator.compile());
return;
}
units.push(FilterPatternGeneric.compile(this));
}
compileToAtomicFilter(fdata, writer) { compileToAtomicFilter(fdata, writer) {
const catBits = this.action | this.party; const catBits = this.action | this.party;
let { typeBits } = this; let { typeBits } = this;
@ -3544,8 +3343,8 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
/******************************************************************************/ /******************************************************************************/
const FilterContainer = function() { const FilterContainer = function() {
this.compilerVersion = '2'; this.compilerVersion = '4';
this.selfieVersion = '3'; this.selfieVersion = '4';
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;
@ -3697,6 +3496,8 @@ FilterContainer.prototype.freeze = function() {
this.goodFilters.clear(); this.goodFilters.clear();
filterArgsToUnit.clear(); filterArgsToUnit.clear();
//this.filterClassHistogram();
// Optimizing is not critical for the static network filtering engine to // Optimizing is not critical for the static network filtering engine to
// work properly, so defer this until later to allow for reduced delay to // work properly, so defer this until later to allow for reduced delay to
// readiness when no valid selfie is available. // readiness when no valid selfie is available.
@ -3716,8 +3517,6 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;
} }
//this.filterClassHistogram();
const later = throttle => { const later = throttle => {
this.optimizeTaskId = queueTask(( ) => { this.optimizeTaskId = queueTask(( ) => {
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;