Fine-tune static network filtering engine code

Refactored heuristics to collate set of origin-related
filter units are collated into a hostname trie, and
for better reuse of existing classes.

Generalized pre-test idea for bucket of filters, such
that in addition to origin-related filter units, there is
now a class to collate regex-based pattern-related units
into a new pre-test bucket class, FilterBucketIfRegexHits,
in order to test with a single regex test whether there is
a chance of a hit in the underlying bucket of filters.
Instances of these are rare, but at time of commit I found
this occurs with AdGuard France filter list.

Fine-tuned the "SNFE: Dump" output -- this new ability to
see the internal details of the SNFE has been really key
into finding/fixing issues during refactoring.
This commit is contained in:
Raymond Hill 2021-12-16 09:12:43 -05:00
parent fb0de0cc9c
commit df56fc55d2
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 350 additions and 259 deletions

View File

@ -174,8 +174,8 @@ const µBlock = { // jshint ignore:line
// Read-only // Read-only
systemSettings: { systemSettings: {
compiledMagic: 43, // Increase when compiled format changes compiledMagic: 44, // Increase when compiled format changes
selfieMagic: 43, // Increase when selfie format changes selfieMagic: 44, // Increase when selfie format changes
}, },
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -194,19 +194,18 @@ let $patternMatchRight = 0;
let $isBlockImportant = false; let $isBlockImportant = false;
const $docEntity = { const $docEntity = {
entity: undefined, entity: '',
last: '',
compute() { compute() {
if ( this.entity === undefined ) { if ( this.last !== $docHostname ) {
this.last = $docHostname;
const pos = $docDomain.indexOf('.'); const pos = $docDomain.indexOf('.');
this.entity = pos !== -1 this.entity = pos !== -1
? $docHostname.slice(0, pos - $docDomain.length) ? `${$docHostname.slice(0, pos - $docDomain.length)}.*`
: ''; : '';
} }
return this.entity; return this.entity;
}, },
reset() {
this.entity = undefined;
},
}; };
const $httpHeaders = { const $httpHeaders = {
@ -561,6 +560,14 @@ const filterGetDomainOpt = (idata, out) => {
out.push(domainOpt); out.push(domainOpt);
}; };
const filterGetRegexPattern = (idata, out) => {
const fc = filterClasses[filterData[idata+0]];
if ( fc.hasRegexPattern === undefined ) { return; }
const reStr = fc.getRegexPattern(idata);
if ( out === undefined ) { return reStr; }
out.push(reStr);
};
const filterIsBidiTrieable = idata => { const filterIsBidiTrieable = idata => {
const fc = filterClasses[filterData[idata+0]]; const fc = filterClasses[filterData[idata+0]];
if ( fc.isBidiTrieable === undefined ) { return false; } if ( fc.isBidiTrieable === undefined ) { return false; }
@ -591,6 +598,13 @@ const filterLogData = (idata, details) => {
fc.logData(idata, details); fc.logData(idata, details);
}; };
const filterDumpInfo = (idata) => {
const fc = filterGetClass(idata);
if ( fc.dumpInfo === undefined ) { return; }
return fc.dumpInfo(idata);
};
/******************************************************************************/ /******************************************************************************/
const FilterPatternAny = class { const FilterPatternAny = class {
@ -772,18 +786,24 @@ registerFilterClass(FilterPatternPlainX);
/******************************************************************************/ /******************************************************************************/
const FilterPatternGeneric = class { const FilterPatternGeneric = class {
static hasRegexPattern() {
return true;
}
static getRegexPattern(idata) {
return restrFromGenericPattern(
bidiTrie.extractString(
filterData[idata+1],
filterData[idata+2]
),
filterData[idata+3]
);
}
static match(idata) { static match(idata) {
const refs = filterRefs[filterData[idata+4]]; const refs = filterRefs[filterData[idata+4]];
if ( refs.$re === null ) { if ( refs.$re === null ) {
refs.$re = new RegExp( refs.$re = new RegExp(this.getRegexPattern(idata));
restrFromGenericPattern(
bidiTrie.extractString(
filterData[idata+1],
filterData[idata+2]
),
filterData[idata+3]
)
);
} }
return refs.$re.test($requestURL); return refs.$re.test($requestURL);
} }
@ -1018,14 +1038,22 @@ registerFilterClass(FilterTrailingSeparator);
/******************************************************************************/ /******************************************************************************/
const FilterRegex = class { const FilterRegex = class {
static hasRegexPattern() {
return true;
}
static getRegexPattern(idata) {
return bidiTrie.extractString(
filterData[idata+1],
filterData[idata+2]
);
}
static match(idata) { static match(idata) {
const refs = filterRefs[filterData[idata+4]]; const refs = filterRefs[filterData[idata+4]];
if ( refs.$re === null ) { if ( refs.$re === null ) {
refs.$re = new RegExp( refs.$re = new RegExp(
bidiTrie.extractString( this.getRegexPattern(idata),
filterData[idata+1],
filterData[idata+2]
),
filterData[idata+3] === 0 ? '' : 'i' filterData[idata+3] === 0 ? '' : 'i'
); );
} }
@ -1178,82 +1206,96 @@ const domainOptNormalizer = domainOpt => {
/******************************************************************************/ /******************************************************************************/
// The optimal "class" is picked according to the content of the // The optimal class is picked according to the content of the `domain=`
// `domain=` filter option. // filter option.
function compileDomainOpt(iterable, prepend, units) {
const FilterOrigin = class { const hostnameHits = [];
compile(domainOptList, prepend, units) { const hostnameMisses = [];
const hostnameHits = []; const entityHits = [];
const hostnameMisses = []; const entityMisses = [];
const entityHits = []; for ( const s of iterable ) {
const entityMisses = []; const len = s.length;
for ( const s of domainOptList ) { const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0;
const len = s.length; if ( len <= beg ) { continue; }
const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0; if ( s.endsWith('.*') === false ) {
const end = len > 2 && if ( beg === 0 ) {
s.charCodeAt(len - 1) === 0x2A /* '*' */ && hostnameHits.push(s);
s.charCodeAt(len - 2) === 0x2E /* '.' */
? len - 2 : len;
if ( end <= beg ) { continue; }
if ( end === len ) {
if ( beg === 0 ) {
hostnameHits.push(s);
} else {
hostnameMisses.push(s.slice(1));
}
} else { } else {
if ( beg === 0 ) { hostnameMisses.push(s.slice(1));
entityHits.push(s.slice(0, -2));
} else {
entityMisses.push(s.slice(1, -2));
}
}
}
const compiledHit = [];
if ( entityHits.length !== 0 ) {
for ( const entity of entityHits ) {
compiledHit.push(FilterOriginEntityHit.compile(entity));
}
}
if ( hostnameHits.length === 1 ) {
compiledHit.push(FilterOriginHit.compile(hostnameHits[0]));
} else if ( hostnameHits.length > 1 ) {
compiledHit.push(FilterOriginHitSet.compile(hostnameHits.join('|')));
}
if ( compiledHit.length > 1 ) {
compiledHit[0] = FilterOriginHitAny.compile(compiledHit.slice());
compiledHit.length = 1;
}
const compiledMiss = [];
if ( entityMisses.length !== 0 ) {
for ( const entity of entityMisses ) {
compiledMiss.push(FilterOriginEntityMiss.compile(entity));
}
}
if ( hostnameMisses.length === 1 ) {
compiledMiss.push(FilterOriginMiss.compile(hostnameMisses[0]));
} else if ( hostnameMisses.length > 1 ) {
compiledMiss.push(FilterOriginMissSet.compile(hostnameMisses.join('|')));
}
if ( prepend ) {
if ( compiledHit.length !== 0 ) {
units.unshift(compiledHit[0]);
}
if ( compiledMiss.length !== 0 ) {
units.unshift(...compiledMiss);
} }
} else if ( beg === 0 ) {
entityHits.push(s);
} else { } else {
if ( compiledMiss.length !== 0 ) { entityMisses.push(s.slice(1));
units.push(...compiledMiss);
}
if ( compiledHit.length !== 0 ) {
units.push(compiledHit[0]);
}
} }
} }
}; const toTrie = [];
let trieWhich = 0b00;
const filterOrigin = new FilterOrigin(); if ( hostnameHits.length > 1 ) {
toTrie.push(...hostnameHits);
hostnameHits.length = 0;
trieWhich |= 0b01;
}
if ( entityHits.length > 1 ) {
toTrie.push(...entityHits);
entityHits.length = 0;
trieWhich |= 0b10;
}
const compiledHit = [];
if ( toTrie.length !== 0 ) {
compiledHit.push(
FilterOriginHitSet.compile(toTrie.sort(), trieWhich)
);
}
for ( const hn of hostnameHits ) {
compiledHit.push(FilterOriginHit.compile(hn));
}
for ( const hn of entityHits ) {
compiledHit.push(FilterOriginEntityHit.compile(hn));
}
if ( compiledHit.length > 1 ) {
compiledHit[0] = FilterOriginHitAny.compile(compiledHit.slice());
compiledHit.length = 1;
}
toTrie.length = trieWhich = 0;
if ( hostnameMisses.length > 1 ) {
toTrie.push(...hostnameMisses);
hostnameMisses.length = 0;
trieWhich |= 0b01;
}
if ( entityMisses.length > 1 ) {
toTrie.push(...entityMisses);
entityMisses.length = 0;
trieWhich |= 0b10;
}
const compiledMiss = [];
if ( toTrie.length !== 0 ) {
compiledMiss.push(
FilterOriginMissSet.compile(toTrie.sort(), trieWhich)
);
}
for ( const hn of hostnameMisses ) {
compiledMiss.push(FilterOriginMiss.compile(hn));
}
for ( const hn of entityMisses ) {
compiledMiss.push(FilterOriginEntityMiss.compile(hn));
}
if ( prepend ) {
if ( compiledHit.length !== 0 ) {
units.unshift(compiledHit[0]);
}
if ( compiledMiss.length !== 0 ) {
units.unshift(...compiledMiss);
}
} else {
if ( compiledMiss.length !== 0 ) {
units.push(...compiledMiss);
}
if ( compiledHit.length !== 0 ) {
units.push(compiledHit[0]);
}
}
}
/******************************************************************************/ /******************************************************************************/
@ -1328,9 +1370,9 @@ const FilterOriginHitSet = class {
// The `domainOpt` value may be in either the allocated refs or the trie, // The `domainOpt` value may be in either the allocated refs or the trie,
// never in both at the same time. // never in both at the same time.
static getDomainOpt(idata) { static getDomainOpt(idata) {
const itrie = filterData[idata+1]; const itrie = filterData[idata+2];
if ( itrie === 0 ) { if ( itrie === 0 ) {
return filterRefs[filterData[idata+3]].domainOpt; return filterRefs[filterData[idata+4]].domainOpt;
} }
return domainOptNormalizer( return domainOptNormalizer(
Array.from(origHNTrieContainer.trieIterator(itrie)).join('|') Array.from(origHNTrieContainer.trieIterator(itrie)).join('|')
@ -1342,63 +1384,66 @@ const FilterOriginHitSet = class {
} }
static match(idata) { static match(idata) {
if ( this.matchSameAsLast(idata) === false ) { const refs = filterRefs[filterData[idata+4]];
filterRefs[filterData[idata+3]].$last = $docHostname; if ( $docHostname === refs.$last ) {
const oneOf = filterData[idata+1] || this.toTrie(idata); return filterData[idata+3] !== -1;
// Warning: The trie must be created at this point
filterData[idata+2] = origHNTrieContainer
.setNeedle($docHostname)
.matches(oneOf);
} }
return filterData[idata+2] !== -1; refs.$last = $docHostname;
const which = filterData[idata+1];
const itrie = filterData[idata+2] || this.toTrie(idata);
let lastResult = -1;
if ( (which & 0b01) !== 0 ) {
lastResult = origHNTrieContainer
.setNeedle($docHostname)
.matches(itrie);
}
if ( lastResult === -1 && (which & 0b10) !== 0 ) {
lastResult = origHNTrieContainer
.setNeedle($docEntity.compute())
.matches(itrie);
}
return (filterData[idata+3] = lastResult) !== -1;
} }
static matchSameAsLast(idata) { static create(domainOpt, which = 0b11) {
return $docHostname === filterRefs[filterData[idata+3]].$last; const idata = filterDataAllocLen(5);
}
static create(domainOpt) {
const idata = filterDataAllocLen(4);
filterData[idata+0] = FilterOriginHitSet.fid; filterData[idata+0] = FilterOriginHitSet.fid;
filterData[idata+1] = 0; // oneOf filterData[idata+1] = which;
filterData[idata+2] = -1; // $lastResult filterData[idata+2] = 0; // itrie
filterData[idata+3] = filterRefAdd({ filterData[idata+3] = -1; // $lastResult
domainOpt, filterData[idata+4] = filterRefAdd({ domainOpt, $last: '' });
$last: '',
});
return idata; return idata;
} }
static compile(domainOpt) { static compile(hostnames, which) {
return [ return [
FilterOriginHitSet.fid, FilterOriginHitSet.fid,
domainOptNormalizer(domainOpt), hostnames.join('|'),
which
]; ];
} }
static fromCompiled(args) { static fromCompiled(args) {
const idata = filterDataAllocLen(4); const idata = filterDataAllocLen(5);
filterData[idata+0] = args[0]; // fid filterData[idata+0] = args[0]; // fid
filterData[idata+1] = 0; // oneOf filterData[idata+1] = args[2]; // which
filterData[idata+2] = -1; // $lastResult filterData[idata+2] = 0; // itrie
filterData[idata+3] = filterRefAdd({ filterData[idata+3] = -1; // $lastResult
domainOpt: args[1], filterData[idata+4] = filterRefAdd({ domainOpt: args[1], $last: '' });
$last: '',
});
return idata; return idata;
} }
static toTrie(idata) { static toTrie(idata) {
const refs = filterRefs[filterData[idata+3]]; const refs = filterRefs[filterData[idata+4]];
const oneOf = filterData[idata+1] = origHNTrieContainer.createTrie( const itrie = filterData[idata+2] = origHNTrieContainer.createTrie(
domainOptIterator.reset(refs.domainOpt) domainOptIterator.reset(refs.domainOpt)
); );
refs.domainOpt = ''; refs.domainOpt = '';
return oneOf; return itrie;
} }
static getTrie(idata) { static getTrie(idata) {
return filterData[idata+1]; return filterData[idata+2];
} }
static keyFromArgs(args) { static keyFromArgs(args) {
@ -1410,7 +1455,7 @@ const FilterOriginHitSet = class {
} }
static dumpInfo(idata) { static dumpInfo(idata) {
return this.getDomainOpt(idata); return `0b${filterData[idata+1].toString(2)} ${this.getDomainOpt(idata)}`;
} }
}; };
@ -1427,10 +1472,11 @@ const FilterOriginMissSet = class extends FilterOriginHitSet {
return super.match(idata) === false; return super.match(idata) === false;
} }
static compile(domainOpt) { static compile(hostnames, which) {
return [ return [
FilterOriginMissSet.fid, FilterOriginMissSet.fid,
domainOptNormalizer(domainOpt), hostnames.join('|'),
which
]; ];
} }
@ -1449,108 +1495,42 @@ registerFilterClass(FilterOriginMissSet);
/******************************************************************************/ /******************************************************************************/
const FilterOriginEntityHit = class { const FilterOriginEntityHit = class extends FilterOriginHit {
static getDomainOpt(idata) {
return `${filterRefs[filterData[idata+1]]}.*`;
}
static hasOriginHit() {
return true;
}
static match(idata) { static match(idata) {
const entity = $docEntity.compute(); return origHNTrieContainer.matchesHostname(
if ( entity === '' ) { return false; } $docEntity.compute(),
const thisEntity = filterRefs[filterData[idata+1]]; filterData[idata+1],
const offset = entity.length - thisEntity.length; filterData[idata+2]
if ( offset < 0 ) { return false; } );
if ( entity.charCodeAt(offset) !== thisEntity.charCodeAt(0) ) {
return false;
}
if ( entity.endsWith(thisEntity) === false ) { return false; }
return offset === 0 || entity.charCodeAt(offset-1) === 0x2E /* '.' */;
} }
static compile(entity) { static compile(entity) {
return [ FilterOriginEntityHit.fid, entity ]; return [ FilterOriginEntityHit.fid, entity ];
} }
static fromCompiled(args) {
const idata = filterDataAllocLen(2);
filterData[idata+0] = args[0]; // fid
filterData[idata+1] = filterRefAdd(args[1]); // entity
return idata;
}
static logData(idata, details) {
details.domains.push(this.getDomainOpt(idata));
}
static dumpInfo(idata) {
return this.getDomainOpt(idata);
}
}; };
registerFilterClass(FilterOriginEntityHit); registerFilterClass(FilterOriginEntityHit);
/******************************************************************************/ /******************************************************************************/
const FilterOriginEntityMiss = class extends FilterOriginEntityHit { const FilterOriginEntityMiss = class extends FilterOriginMiss {
static hasOriginHit() {
return false;
}
static match(idata) { static match(idata) {
return super.match(idata) === false; return origHNTrieContainer.matchesHostname(
$docEntity.compute(),
filterData[idata+1],
filterData[idata+2]
) === false;
} }
static compile(entity) { static compile(entity) {
return [ FilterOriginEntityMiss.fid, entity ]; return [ FilterOriginEntityMiss.fid, entity ];
} }
static logData(idata, details) {
details.domains.push(`~${this.getDomainOpt(idata)}`);
}
}; };
registerFilterClass(FilterOriginEntityMiss); registerFilterClass(FilterOriginEntityMiss);
/******************************************************************************/ /******************************************************************************/
const FilterOriginHitSetTest = class extends FilterOriginHitSet {
static match(idata) {
const ihitset = filterData[idata+1];
if ( this.matchSameAsLast(ihitset) === false ) {
filterData[idata+3] =
super.match(ihitset) ||
filterData[idata+2] !== 0 &&
origHNTrieContainer
.setNeedle(`${$docEntity.compute()}.*`)
.matches(super.getTrie(ihitset)) !== -1
? 1
: 0;
}
return filterData[idata+3] !== 0;
}
static create(domainOpt) {
const idata = filterDataAllocLen(4);
filterData[idata+0] = FilterOriginHitSetTest.fid;
filterData[idata+1] = FilterOriginHitSet.create(domainOpt); // ihitset
filterData[idata+2] = domainOpt.includes('.*') ? 1 : 0; // hasEntity
filterData[idata+3] = 0; // $lastResult
return idata;
}
static dumpInfo(idata) {
return super.dumpInfo(filterData[idata+1]);
}
};
registerFilterClass(FilterOriginHitSetTest);
/******************************************************************************/
const FilterModifier = class { const FilterModifier = class {
static getModifierType(idata) { static getModifierType(idata) {
return filterData[idata+2]; return filterData[idata+2];
@ -1785,9 +1765,23 @@ const FilterCompositeAll = class extends FilterCollection {
static hasOriginHit(idata) { static hasOriginHit(idata) {
return this.forEach(idata, iunit => { return this.forEach(idata, iunit => {
if ( filterHasOriginHit(iunit) === true ) { if ( filterHasOriginHit(iunit) === true ) { return true; }
return true; }) || false;
} }
static hasRegexPattern(idata) {
return this.forEach(idata, iunit => {
const fc = filterGetClass(iunit);
if ( fc.hasRegexPattern === undefined ) { return; }
if ( fc.hasRegexPattern(iunit) === true ) { return true; }
}) || false;
}
static getRegexPattern(idata) {
return this.forEach(idata, iunit => {
const fc = filterGetClass(iunit);
if ( fc.getRegexPattern === undefined ) { return; }
return fc.getRegexPattern(iunit);
}); });
} }
@ -2169,13 +2163,30 @@ const FilterBucket = class extends FilterCollection {
this.unshift(idata, iplaintrie); this.unshift(idata, iplaintrie);
} }
} }
if ( filterData[idata+2] >= 10 && (optimizeBits & 0b10) !== 0 ) { if ( filterData[idata+2] >= 5 && (optimizeBits & 0b10) !== 0 ) {
const ioriginhit = this.optimizeOriginHitTests(idata); const ioptimized = this.optimizeMatch(
if ( ioriginhit !== 0 ) { idata,
FilterBucketIfOriginHits,
5
);
if ( ioptimized !== 0 ) {
const icollection = filterData[idata+1]; const icollection = filterData[idata+1];
const i = filterData[icollection+1]; const i = filterData[icollection+1];
if ( i === 0 ) { return ioriginhit; } if ( i === 0 ) { return ioptimized; }
this.unshift(idata, ioriginhit); this.unshift(idata, ioptimized);
}
}
if ( filterData[idata+2] >= 5 && (optimizeBits & 0b10) !== 0 ) {
const ioptimized = this.optimizeMatch(
idata,
FilterBucketIfRegexHits,
5
);
if ( ioptimized !== 0 ) {
const icollection = filterData[idata+1];
const i = filterData[icollection+1];
if ( i === 0 ) { return ioptimized; }
this.unshift(idata, ioptimized);
} }
} }
return 0; return 0;
@ -2213,25 +2224,24 @@ const FilterBucket = class extends FilterCollection {
return iplaintrie; return iplaintrie;
} }
static optimizeOriginHitTests(idata) { static optimizeMatch(idata, fc, min) {
let candidateCount = 0;
const isrccollection = filterData[idata+1]; const isrccollection = filterData[idata+1];
const shouldPreTest = this.forEach(isrccollection, iunit => { const candidates = [];
if ( filterHasOriginHit(iunit) !== true ) { return; } this.forEach(idata, iunit => {
candidateCount += 1; if ( fc.canCoallesce(iunit) === false ) { return; }
if ( candidateCount >= 10 ) { return true; } candidates.push(iunit);
}); });
if ( shouldPreTest !== true ) { return 0; } if ( candidates.length < min ) { return 0; }
const idesbucket = FilterBucket.create(); const idesbucket = FilterBucket.create();
const idescollection = filterData[idesbucket+1]; const idescollection = filterData[idesbucket+1];
const domainOpts = []; let coallesced;
let isrcseq = filterData[isrccollection+1]; let isrcseq = filterData[isrccollection+1];
let iprev = 0; let iprev = 0;
for (;;) { for (;;) {
const iunit = filterData[isrcseq+0]; const iunit = filterData[isrcseq+0];
const inext = filterData[isrcseq+1]; const inext = filterData[isrcseq+1];
if ( filterHasOriginHit(iunit) === true ) { if ( candidates.includes(iunit) ) {
filterGetDomainOpt(iunit, domainOpts); coallesced = fc.coallesce(iunit, coallesced);
// move the sequence slot to new bucket // move the sequence slot to new bucket
filterData[isrcseq+1] = filterData[idescollection+1]; filterData[isrcseq+1] = filterData[idescollection+1];
filterData[idescollection+1] = isrcseq; filterData[idescollection+1] = isrcseq;
@ -2248,8 +2258,7 @@ const FilterBucket = class extends FilterCollection {
if ( inext === 0 ) { break; } if ( inext === 0 ) { break; }
isrcseq = inext; isrcseq = inext;
} }
const ioriginhitset = FilterOriginHitSetTest.create(domainOpts.join('|')); return fc.create(coallesced, idesbucket);
return FilterBucketOfOriginHits.create(ioriginhitset, idesbucket);
} }
static dumpInfo(idata) { static dumpInfo(idata) {
@ -2261,36 +2270,122 @@ registerFilterClass(FilterBucket);
/******************************************************************************/ /******************************************************************************/
const FilterBucketOfOriginHits = class extends FilterBucket { // Filter bucket objects which have a pre-test method before being treated
// as a plain filter bucket -- the pre-test method should be fast as it is
// used to avoid having to iterate through the content of the filter bicket.
const FilterBucketIf = class extends FilterBucket {
static getCount(idata) { static getCount(idata) {
return super.getCount(filterData[idata+2]); return super.getCount(filterData[idata+1]);
} }
static forEach(idata, fn) { static forEach(idata, fn) {
return super.forEach(filterData[idata+2], fn); return super.forEach(filterData[idata+1], fn);
} }
static match(idata) { static match(idata) {
return filterMatch(filterData[idata+1]) && return this.preTest(idata) && super.match(filterData[idata+1]);
filterMatch(filterData[idata+2]);
} }
static matchAndFetchModifiers(idata, env) { static matchAndFetchModifiers(idata, env) {
if ( filterMatch(filterData[idata+1]) ) { if ( this.preTest(idata) ) {
super.matchAndFetchModifiers(filterData[idata+2], env); super.matchAndFetchModifiers(filterData[idata+1], env);
} }
} }
static create(ioriginhitset, ibucket) { static create(fid, ibucket, itest) {
const idata = filterDataAllocLen(3); const idata = filterDataAllocLen(3);
filterData[idata+0] = FilterBucketOfOriginHits.fid; filterData[idata+0] = fid;
filterData[idata+1] = ioriginhitset; // originHitSet filterData[idata+1] = ibucket;
filterData[idata+2] = ibucket; // collection filterData[idata+2] = itest;
return idata; return idata;
} }
static logData(idata, details) {
filterLogData(filterData[idata+1], details);
}
}; };
registerFilterClass(FilterBucketOfOriginHits); registerFilterClass(FilterBucketIf);
/******************************************************************************/
const FilterBucketIfOriginHits = class extends FilterBucketIf {
static preTest(idata) {
return filterMatch(filterData[idata+2]);
}
static canCoallesce(iunit) {
return filterHasOriginHit(iunit);
}
static coallesce(iunit, coallesced) {
if ( coallesced === undefined ) {
coallesced = new Set();
}
const domainOpt = filterGetDomainOpt(iunit);
if ( domainOpt.includes('|') ) {
for ( const hn of domainOptIterator.reset(domainOpt) ) {
coallesced.add(hn);
}
} else {
coallesced.add(domainOpt);
}
return coallesced;
}
static create(coallesced, ibucket) {
const units = [];
compileDomainOpt(coallesced, false, units);
const ihittest = filterFromCompiled(units[0]);
const ipretest = super.create(
FilterBucketIfOriginHits.fid,
ibucket,
ihittest
);
return ipretest;
}
};
registerFilterClass(FilterBucketIfOriginHits);
/******************************************************************************/
const FilterBucketIfRegexHits = class extends FilterBucketIf {
static preTest(idata) {
return filterRefs[filterData[idata+2]].test($requestURLRaw);
}
static canCoallesce(iunit) {
const fc = filterGetClass(iunit);
if ( fc.hasRegexPattern === undefined ) { return false; }
if ( fc.hasRegexPattern(iunit) !== true ) { return false; }
return true;
}
static coallesce(iunit, coallesced) {
if ( coallesced === undefined ) {
coallesced = new Set();
}
coallesced.add(filterGetRegexPattern(iunit));
return coallesced;
}
static create(coallesced, ibucket) {
const reString = Array.from(coallesced).join('|');
return super.create(
FilterBucketIfRegexHits.fid,
ibucket,
filterRefAdd(new RegExp(reString, 'i'))
);
}
static dumpInfo(idata) {
return filterRefs[filterData[idata+2]].source;
}
};
registerFilterClass(FilterBucketIfRegexHits);
/******************************************************************************/ /******************************************************************************/
@ -3246,7 +3341,7 @@ class FilterCompiler {
const units = []; const units = [];
this.compilePattern(units); this.compilePattern(units);
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); } if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
filterOrigin.compile([ entity ], true, units); compileDomainOpt([ entity ], true, units);
this.compileToAtomicFilter( this.compileToAtomicFilter(
FilterCompositeAll.compile(units), FilterCompositeAll.compile(units),
writer writer
@ -3286,7 +3381,7 @@ class FilterCompiler {
// Origin // Origin
if ( this.domainOpt !== '' ) { if ( this.domainOpt !== '' ) {
filterOrigin.compile( compileDomainOpt(
this.domainOptList, this.domainOptList,
units.length !== 0 && filterClasses[units[0][0]].isSlow === true, units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
units units
@ -3407,7 +3502,7 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
/******************************************************************************/ /******************************************************************************/
const FilterContainer = function() { const FilterContainer = function() {
this.compilerVersion = '5'; this.compilerVersion = '6';
this.selfieVersion = '6'; this.selfieVersion = '6';
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
@ -3566,7 +3661,7 @@ FilterContainer.prototype.freeze = function() {
this.optimizeTaskId = queueTask(( ) => { this.optimizeTaskId = queueTask(( ) => {
this.optimizeTaskId = undefined; this.optimizeTaskId = undefined;
this.optimize(10); this.optimize(30);
}, 2000); }, 2000);
}; };
@ -3638,11 +3733,13 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
default: default:
break; break;
} }
if ( throttle > 0 && (Date.now() - t0) > 48 ) { if ( throttle > 0 && (Date.now() - t0) > 40 ) {
return later(throttle - 1); return later(throttle - 1);
} }
} }
filterArgsToUnit.clear();
// Here we do not optimize origHNTrieContainer because many origin-related // Here we do not optimize origHNTrieContainer because many origin-related
// tries are instantiated on demand. // tries are instantiated on demand.
keyvalStore.setItem( keyvalStore.setItem(
@ -3831,7 +3928,6 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
$requestURLRaw = fctxt.url; $requestURLRaw = fctxt.url;
$docHostname = fctxt.getDocHostname(); $docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain(); $docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname(); $requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
@ -4140,7 +4236,6 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) {
// These registers will be used by various filters // These registers will be used by various filters
$docHostname = $requestHostname = hostnameFromNetworkURL(url); $docHostname = $requestHostname = hostnameFromNetworkURL(url);
$docDomain = domainFromHostname($docHostname); $docDomain = domainFromHostname($docHostname);
$docEntity.reset();
// Exception filters // Exception filters
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) { if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
@ -4200,7 +4295,6 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
// These registers will be used by various filters // These registers will be used by various filters
$docHostname = fctxt.getDocHostname(); $docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain(); $docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname(); $requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
$isBlockImportant = false; $isBlockImportant = false;
@ -4235,7 +4329,6 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
// These registers will be used by various filters // These registers will be used by various filters
$docHostname = fctxt.getDocHostname(); $docHostname = fctxt.getDocHostname();
$docDomain = fctxt.getDocDomain(); $docDomain = fctxt.getDocDomain();
$docEntity.reset();
$requestHostname = fctxt.getHostname(); $requestHostname = fctxt.getHostname();
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
$httpHeaders.init(headers); $httpHeaders.init(headers);
@ -4484,12 +4577,6 @@ FilterContainer.prototype.dump = function() {
[ EMPTY_TOKEN_HASH, 'EMPTY_TOKEN_HASH' ], [ EMPTY_TOKEN_HASH, 'EMPTY_TOKEN_HASH' ],
]); ]);
const dumpInfo = (idata, options) => {
const fc = filterClasses[filterData[idata+0]];
if ( fc.dumpInfo === undefined ) { return; }
return fc.dumpInfo(idata, options);
};
const out = []; const out = [];
const toOutput = (depth, line) => { const toOutput = (depth, line) => {
@ -4500,7 +4587,7 @@ FilterContainer.prototype.dump = function() {
const dumpUnit = (idata, out, depth = 0) => { const dumpUnit = (idata, out, depth = 0) => {
const fc = filterGetClass(idata); const fc = filterGetClass(idata);
fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1); fcCounts.set(fc.name, (fcCounts.get(fc.name) || 0) + 1);
const info = dumpInfo(idata) || ''; const info = filterDumpInfo(idata) || '';
toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name); toOutput(depth, info !== '' ? `${fc.name}: ${info}` : fc.name);
switch ( fc ) { switch ( fc ) {
case FilterBucket: case FilterBucket:
@ -4511,9 +4598,13 @@ FilterContainer.prototype.dump = function() {
}); });
break; break;
} }
case FilterBucketOfOriginHits: { case FilterBucketIfOriginHits: {
dumpUnit(filterData[idata+1], out, depth+1);
dumpUnit(filterData[idata+2], out, depth+1); dumpUnit(filterData[idata+2], out, depth+1);
dumpUnit(filterData[idata+1], out, depth+1);
break;
}
case FilterBucketIfRegexHits: {
dumpUnit(filterData[idata+1], out, depth+1);
break; break;
} }
default: default:
@ -4554,7 +4645,7 @@ FilterContainer.prototype.dump = function() {
const ths = thConstants.has(th) const ths = thConstants.has(th)
? thConstants.get(th) ? thConstants.get(th)
: `0x${th.toString(16)}`; : `0x${th.toString(16)}`;
toOutput(4, `th: ${ths}`); toOutput(4, `+ th: ${ths}`);
dumpUnit(iunit, out, 5); dumpUnit(iunit, out, 5);
} }
} }