Exclude data type (i.e. `csp=`) from bidi-trie

We need a `matchAll()` method on the bidi-trie before
we can store filters of type `data` in it.

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/761

Related commit:
- 7971b22385
This commit is contained in:
Raymond Hill 2019-10-22 18:14:49 -04:00
parent 2681209828
commit 30393fdcf1
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 81 additions and 24 deletions

View File

@ -161,6 +161,8 @@ const toNormalizedType = {
const BlockImportant = BlockAction | Important; const BlockImportant = BlockAction | Important;
const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
/******************************************************************************/ /******************************************************************************/
// See the following as short-lived registers, used during evaluation. They are // See the following as short-lived registers, used during evaluation. They are
@ -183,12 +185,10 @@ let $requestTypeBit = 0;
const restrSeparator = '(?:[^%.0-9a-z_-]|$)'; const restrSeparator = '(?:[^%.0-9a-z_-]|$)';
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
const reEscape = /[.*+?^${}()|[\]\\]/g;
// Convert a plain string (devoid of special characters) into a regex. // Convert a plain string (devoid of special characters) into a regex.
const restrFromPlainPattern = function(s) { const restrFromPlainPattern = s => s.replace(reEscape, '\\$&');
return s.replace(restrFromPlainPattern.escape, '\\$&');
};
restrFromPlainPattern.escape = /[.*+?^${}()|[\]\\]/g;
const restrFromGenericPattern = function(s, anchor = 0) { const restrFromGenericPattern = function(s, anchor = 0) {
let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&') let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&')
@ -270,9 +270,7 @@ const CHAR_CLASS_SEPARATOR = 0b00000001;
} }
} }
const isSeparatorChar = function(c) { const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
return (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
};
/******************************************************************************/ /******************************************************************************/
@ -494,8 +492,8 @@ registerFilterClass(FilterTrue);
const FilterPatternPlain = class { const FilterPatternPlain = class {
constructor(i, n) { constructor(i, n) {
this.i = i; this.i = i | 0;
this.n = n; this.n = n | 0;
} }
match() { match() {
@ -596,8 +594,8 @@ const FilterPatternPlainX = class extends FilterPatternPlain {
const FilterPatternLeft = class { const FilterPatternLeft = class {
constructor(i, n) { constructor(i, n) {
this.i = i; this.i = i | 0;
this.n = n; this.n = n | 0;
} }
match() { match() {
@ -682,8 +680,8 @@ registerFilterClass(FilterPatternLeftEx);
const FilterPatternRight = class { const FilterPatternRight = class {
constructor(i, n) { constructor(i, n) {
this.i = i; this.i = i | 0;
this.n = n; this.n = n | 0;
} }
match() { match() {
@ -1497,8 +1495,8 @@ const FilterDataHolderResult = class {
const FilterCollection = class { const FilterCollection = class {
constructor(i = 0, n = 0) { constructor(i = 0, n = 0) {
this.i = i; this.i = i | 0;
this.n = n; this.n = n | 0;
} }
get size() { get size() {
@ -1796,6 +1794,47 @@ registerFilterClass(FilterHTTPJustOrigin);
/******************************************************************************/ /******************************************************************************/
const FilterPlainTrie = class {
constructor(trie) {
this.plainTrie = trie;
}
match() {
if ( this.plainTrie.matches($tokenBeg) ) {
this.$matchedUnit = this.plainTrie.$iu;
return true;
}
return false;
}
matchAndFetchData(/* type, out */) {
// TODO
}
logData(details) {
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s));
if ( this.$matchedUnit !== -1 ) {
filterUnits[this.$matchedUnit].logData(details);
}
}
toSelfie() {
return [ this.fid, bidiTrie.compileOne(this.plainTrie) ];
}
static fromSelfie(args) {
return new FilterPlainTrie(bidiTrie.createOne(args[1]));
}
};
FilterPlainTrie.prototype.$matchedUnit = 0;
registerFilterClass(FilterPlainTrie);
/******************************************************************************/
const FilterBucket = class extends FilterCollection { const FilterBucket = class extends FilterCollection {
match() { match() {
if ( this.plainTrie !== null ) { if ( this.plainTrie !== null ) {
@ -1847,6 +1886,7 @@ const FilterBucket = class extends FilterCollection {
} }
optimize() { optimize() {
if ( this.n < 3 ) { return; }
const units = filterUnits; const units = filterUnits;
const trieables = new Set(); const trieables = new Set();
let i = this.i; let i = this.i;
@ -1858,7 +1898,7 @@ const FilterBucket = class extends FilterCollection {
i = filterSequences[i+1]; i = filterSequences[i+1];
if ( i === 0 ) { break; } if ( i === 0 ) { break; }
} }
if ( trieables.size <= 2 ) { return; } if ( trieables.size < 3 ) { return; }
if ( this.plainTrie === null ) { if ( this.plainTrie === null ) {
this.plainTrie = bidiTrie.createOne(); this.plainTrie = bidiTrie.createOne();
} }
@ -1880,6 +1920,9 @@ const FilterBucket = class extends FilterCollection {
if ( inext === 0 ) { break; } if ( inext === 0 ) { break; }
i = inext; i = inext;
} }
if ( this.i === 0 ) {
return new FilterPlainTrie(this.plainTrie);
}
} }
_addToTrie(iunit) { _addToTrie(iunit) {
@ -2707,11 +2750,17 @@ FilterContainer.prototype.freeze = function() {
this.badFilters.clear(); this.badFilters.clear();
this.goodFilters.clear(); this.goodFilters.clear();
for ( const bucket of this.categories.values() ) { // Skip 'data' type since bidi-trie does not (yet) support matchAll().
const dataTypeValue = typeValueFromCatBits(typeNameToTypeValue['data']);
for ( const [ catBits, bucket ] of this.categories ) {
if ( typeValueFromCatBits(catBits) === dataTypeValue ) { continue; }
for ( const iunit of bucket.values() ) { for ( const iunit of bucket.values() ) {
const f = units[iunit]; const f = units[iunit];
if ( f instanceof FilterBucket === false ) { continue; } if ( f instanceof FilterBucket === false ) { continue; }
f.optimize(); const g = f.optimize();
if ( g !== undefined ) {
units[iunit] = g;
}
} }
} }
@ -3589,8 +3638,9 @@ FilterContainer.prototype.filterClassHistogram = function() {
for ( const fclass of filterClasses ) { for ( const fclass of filterClasses ) {
filterClassDetails.set(fclass.fid, { name: fclass.name, count: 0, }); filterClassDetails.set(fclass.fid, { name: fclass.name, count: 0, });
} }
// Artificial classes to report content of tries // Artificial classes to report content counts
filterClassDetails.set(1000, { name: 'FilterPlainTrie', count: 0, }); filterClassDetails.set(1000, { name: 'FilterPlainTrie Content', count: 0, });
filterClassDetails.set(1001, { name: 'FilterHostnameDict Content', count: 0, });
const countFilter = function(f) { const countFilter = function(f) {
if ( f instanceof Object === false ) { return; } if ( f instanceof Object === false ) { return; }
@ -3600,17 +3650,21 @@ FilterContainer.prototype.filterClassHistogram = function() {
for ( const f of filterUnits ) { for ( const f of filterUnits ) {
if ( f === null ) { continue; } if ( f === null ) { continue; }
countFilter(f); countFilter(f);
if ( f instanceof FilterBucket ) { if ( f instanceof FilterCollection ) {
let i = f.i; let i = f.i;
while ( i !== 0 ) { while ( i !== 0 ) {
countFilter(filterUnits[filterSequences[i+0]]); countFilter(filterUnits[filterSequences[i+0]]);
i = filterSequences[i+1]; i = filterSequences[i+1];
} }
if ( f.plainTrie !== null ) { if ( f.plainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size; filterClassDetails.get(1000).count += f.plainTrie.size;
} }
continue; continue;
} }
if ( f instanceof FilterHostnameDict ) {
filterClassDetails.get(1001).count += f.size;
continue;
}
if ( f instanceof FilterComposite ) { if ( f instanceof FilterComposite ) {
let i = f.i; let i = f.i;
while ( i !== 0 ) { while ( i !== 0 ) {
@ -3619,6 +3673,10 @@ FilterContainer.prototype.filterClassHistogram = function() {
} }
continue; continue;
} }
if ( f instanceof FilterPlainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size;
continue;
}
} }
const results = Array.from(filterClassDetails.values()).sort((a, b) => { const results = Array.from(filterClassDetails.values()).sort((a, b) => {
return b.count - a.count; return b.count - a.count;

View File

@ -101,9 +101,8 @@
*/ */
const PAGE_SIZE = 65536*2; const PAGE_SIZE = 65536*2;
// i32 / i8
const HAYSTACK_START = 0; const HAYSTACK_START = 0;
const HAYSTACK_SIZE = 2048; const HAYSTACK_SIZE = 2048; // i32 / i8
const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048 const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 512 / 2052 const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 512 / 2052
const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 513 / 2056 const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 513 / 2056