mirror of https://github.com/gorhill/uBlock.git
Add support for entity-matching in `domain=` filter option
Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/1008 This commit adds support entity-matching in the filter option `domain=`. Example: pattern$domain=google.* The `*` above is meant to match any suffix from the Public Suffix List. The semantic is exactly the same as the already existing entity-matching support in static extended filtering: - https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#entity Additionally, in this commit: Fix cases where "just-origin" filters of the form `|http*://` were erroneously normalized to `|http://`. The proper normalization of `|http*://` is `*`. Add support to store hostname strings into the character buffer of a hntrie container. As of commit time, there are 5,544 instances of FilterOriginHit, and 732 instances of FilterOriginMiss, which filters require storing/matching a single hostname string. Those strings are now stored in the character buffer of the already existing origin-related hntrie container. (The same approach is used for plain patterns which are not part of a bidi-trie.)
This commit is contained in:
parent
56a3aff857
commit
3c67d2b89f
|
@ -138,8 +138,8 @@ const µBlock = (( ) => { // jshint ignore:line
|
|||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 27, // Increase when compiled format changes
|
||||
selfieMagic: 26, // Increase when selfie format changes
|
||||
compiledMagic: 28, // Increase when compiled format changes
|
||||
selfieMagic: 28, // Increase when selfie format changes
|
||||
},
|
||||
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
|
||||
|
|
|
@ -407,6 +407,49 @@ const HNTrieContainer = class {
|
|||
return true;
|
||||
}
|
||||
|
||||
// The following *Hostname() methods can be used to store hostname strings
|
||||
// outside the trie. This is useful to store/match hostnames which are
|
||||
// not part of a collection, and yet still benefit from storing the strings
|
||||
// into a trie container's character buffer.
|
||||
// TODO: WASM version of matchesHostname()
|
||||
|
||||
storeHostname(hn) {
|
||||
let n = hn.length;
|
||||
if ( n > 255 ) {
|
||||
hn = hn.slice(-255);
|
||||
n = 255;
|
||||
}
|
||||
if ( (this.buf.length - this.buf32[CHAR1_SLOT]) < n ) {
|
||||
this.growBuf(0, n);
|
||||
}
|
||||
const offset = this.buf32[CHAR1_SLOT];
|
||||
this.buf32[CHAR1_SLOT] = offset + n;
|
||||
const buf8 = this.buf;
|
||||
for ( let i = 0; i < n; i++ ) {
|
||||
buf8[offset+i] = hn.charCodeAt(i);
|
||||
}
|
||||
return offset - this.buf32[CHAR0_SLOT];
|
||||
}
|
||||
|
||||
extractHostname(i, n) {
|
||||
const textDecoder = new TextDecoder();
|
||||
const offset = this.buf32[CHAR0_SLOT] + i;
|
||||
return textDecoder.decode(this.buf.subarray(offset, offset + n));
|
||||
}
|
||||
|
||||
matchesHostname(hn, i, n) {
|
||||
this.setNeedle(hn);
|
||||
const buf8 = this.buf;
|
||||
const hr = buf8[255];
|
||||
if ( n > hr ) { return false; }
|
||||
const hl = hr - n;
|
||||
const nl = this.buf32[CHAR0_SLOT] + i;
|
||||
for ( let j = 0; j < n; j++ ) {
|
||||
if ( buf8[nl+j] !== buf8[hl+j] ) { return false; }
|
||||
}
|
||||
return n === hr || hn.charCodeAt(hl-1) === 0x2E /* '.' */;
|
||||
}
|
||||
|
||||
async enableWASM() {
|
||||
if ( typeof WebAssembly !== 'object' ) { return false; }
|
||||
if ( this.wasmMemory instanceof WebAssembly.Memory ) { return true; }
|
||||
|
|
|
@ -173,12 +173,26 @@ const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;
|
|||
let $requestURL = '';
|
||||
let $requestHostname = '';
|
||||
let $docHostname = '';
|
||||
let $docDomain = '';
|
||||
let $tokenBeg = 0;
|
||||
let $patternMatchLeft = 0;
|
||||
let $patternMatchRight = 0;
|
||||
|
||||
// EXPERIMENT: $requestTypeBit
|
||||
let $requestTypeBit = 0;
|
||||
const $docEntity = {
|
||||
entity: undefined,
|
||||
compute() {
|
||||
if ( this.entity === undefined ) {
|
||||
const pos = $docDomain.indexOf('.');
|
||||
this.entity = pos !== -1
|
||||
? $docHostname.slice(0, pos - $docDomain.length)
|
||||
: '';
|
||||
}
|
||||
return this.entity;
|
||||
},
|
||||
reset() {
|
||||
this.entity = undefined;
|
||||
},
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
|
@ -1072,39 +1086,6 @@ registerFilterClass(FilterTrailingSeparator);
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterType = class {
|
||||
constructor(bits) {
|
||||
this.typeBits = bits;
|
||||
}
|
||||
|
||||
match() {
|
||||
return (this.typeBits & $requestTypeBit) !== 0;
|
||||
}
|
||||
|
||||
logData() {
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, this.typeBits ];
|
||||
}
|
||||
|
||||
static compile(details) {
|
||||
return [ FilterType.fid, details.typeBits & allNetworkTypesBits ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterType(args[1]);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterType(args[1]);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterType);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterRegex = class {
|
||||
constructor(s) {
|
||||
this.s = s;
|
||||
|
@ -1162,60 +1143,79 @@ registerFilterClass(FilterRegex);
|
|||
// The optimal "class" is picked according to the content of the
|
||||
// `domain=` filter option.
|
||||
|
||||
const filterOrigin = new (class {
|
||||
const filterOrigin = (( ) => {
|
||||
const FilterOrigin = class {
|
||||
constructor() {
|
||||
this.trieContainer = new µb.HNTrieContainer();
|
||||
}
|
||||
|
||||
compile(details, prepend, units) {
|
||||
const domainOpt = details.domainOpt;
|
||||
let compiledMiss, compiledHit;
|
||||
// One hostname
|
||||
if ( domainOpt.indexOf('|') === -1 ) {
|
||||
// Must be a miss
|
||||
if ( domainOpt.charCodeAt(0) === 0x7E /* '~' */ ) {
|
||||
compiledMiss = FilterOriginMiss.compile(domainOpt);
|
||||
compile(domainOpt, prepend, units) {
|
||||
const hostnameHits = [];
|
||||
const hostnameMisses = [];
|
||||
const entityHits = [];
|
||||
const entityMisses = [];
|
||||
for ( const s of FilterParser.domainOptIterator(domainOpt) ) {
|
||||
const len = s.length;
|
||||
const beg = len > 1 && s.charCodeAt(0) === 0x7E ? 1 : 0;
|
||||
const end = len > 2 &&
|
||||
s.charCodeAt(len - 1) === 0x2A /* '*' */ &&
|
||||
s.charCodeAt(len - 2) === 0x2E /* '.' */
|
||||
? len - 2 : len;
|
||||
if ( end <= beg ) { continue; }
|
||||
if ( end === len ) {
|
||||
if ( beg === 0 ) {
|
||||
hostnameHits.push(s);
|
||||
} else {
|
||||
hostnameMisses.push(s.slice(1));
|
||||
}
|
||||
// Must be a hit
|
||||
else {
|
||||
compiledHit = FilterOriginHit.compile(domainOpt);
|
||||
} else {
|
||||
if ( beg === 0 ) {
|
||||
entityHits.push(s.slice(0, -2));
|
||||
} else {
|
||||
entityMisses.push(s.slice(1, -2));
|
||||
}
|
||||
}
|
||||
// Many hostnames.
|
||||
// Must be in set (none negated).
|
||||
else if ( domainOpt.indexOf('~') === -1 ) {
|
||||
compiledHit = FilterOriginHitSet.compile(domainOpt);
|
||||
}
|
||||
// Must not be in set (all negated).
|
||||
else if ( /^~(?:[^|~]+\|~)+[^|~]+$/.test(domainOpt) ) {
|
||||
compiledMiss = FilterOriginMissSet.compile(domainOpt);
|
||||
const compiledHit = [];
|
||||
if ( entityHits.length !== 0 ) {
|
||||
for ( const entity of entityHits ) {
|
||||
compiledHit.push(FilterOriginEntityHit.compile(entity));
|
||||
}
|
||||
// Must be in one set, but not in the other.
|
||||
else {
|
||||
const hostnames = domainOpt.split('|');
|
||||
const missSet = hostnames.filter(hn => {
|
||||
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) {
|
||||
return hn;
|
||||
}
|
||||
});
|
||||
const hitSet = hostnames.filter(hn => {
|
||||
if ( hn.charCodeAt(0) !== 0x7E /* '~' */ ) {
|
||||
return hn;
|
||||
if ( hostnameHits.length === 1 ) {
|
||||
compiledHit.push(FilterOriginHit.compile(hostnameHits[0]));
|
||||
} else if ( hostnameHits.length > 1 ) {
|
||||
compiledHit.push(FilterOriginHitSet.compile(hostnameHits.join('|')));
|
||||
}
|
||||
});
|
||||
compiledMiss = missSet.length === 1
|
||||
? FilterOriginMiss.compile(missSet[0])
|
||||
: FilterOriginMissSet.compile(missSet.join('|'));
|
||||
compiledHit = hitSet.length === 1
|
||||
? FilterOriginHit.compile(hitSet[0])
|
||||
: FilterOriginHitSet.compile(hitSet.join('|'));
|
||||
if ( compiledHit.length > 1 ) {
|
||||
compiledHit[0] = [ FilterCompositeAny.compile(compiledHit.slice()) ];
|
||||
compiledHit.length = 1;
|
||||
}
|
||||
const compiledMiss = [];
|
||||
if ( entityMisses.length !== 0 ) {
|
||||
for ( const entity of entityMisses ) {
|
||||
compiledMiss.push(FilterOriginEntityMiss.compile(entity));
|
||||
}
|
||||
}
|
||||
if ( hostnameMisses.length === 1 ) {
|
||||
compiledMiss.push(FilterOriginMiss.compile(hostnameMisses[0]));
|
||||
} else if ( hostnameMisses.length > 1 ) {
|
||||
compiledMiss.push(FilterOriginMissSet.compile(hostnameMisses.join('|')));
|
||||
}
|
||||
if ( prepend ) {
|
||||
if ( compiledHit ) { units.unshift(compiledHit); }
|
||||
if ( compiledMiss ) { units.unshift(compiledMiss); }
|
||||
if ( compiledHit.length !== 0 ) {
|
||||
units.unshift(compiledHit[0]);
|
||||
}
|
||||
if ( compiledMiss.length !== 0 ) {
|
||||
units.unshift(...compiledMiss);
|
||||
}
|
||||
} else {
|
||||
if ( compiledMiss ) { units.push(compiledMiss); }
|
||||
if ( compiledHit ) { units.push(compiledHit); }
|
||||
if ( compiledMiss.length !== 0 ) {
|
||||
units.push(...compiledMiss);
|
||||
}
|
||||
if ( compiledHit.length !== 0 ) {
|
||||
units.push(compiledHit[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1241,45 +1241,51 @@ const filterOrigin = new (class {
|
|||
|
||||
fromSelfie() {
|
||||
}
|
||||
};
|
||||
return new FilterOrigin();
|
||||
})();
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterOriginHit = class {
|
||||
constructor(hostname) {
|
||||
this.hostname = hostname;
|
||||
constructor(i, n) {
|
||||
this.i = i;
|
||||
this.n = n;
|
||||
}
|
||||
|
||||
match() {
|
||||
const haystack = $docHostname;
|
||||
const needle = this.hostname;
|
||||
const offset = haystack.length - needle.length;
|
||||
if ( offset < 0 ) { return false; }
|
||||
if ( haystack.charCodeAt(offset) !== needle.charCodeAt(0) ) {
|
||||
return false;
|
||||
}
|
||||
if ( haystack.endsWith(needle) === false ) { return false; }
|
||||
return offset === 0 || haystack.charCodeAt(offset-1) === 0x2E /* '.' */;
|
||||
return filterOrigin.trieContainer.matchesHostname(
|
||||
$docHostname,
|
||||
this.i,
|
||||
this.n
|
||||
);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, this.hostname ];
|
||||
return [ this.fid, this.i, this.n ];
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.domains.push(this.hostname);
|
||||
details.domains.push(this.getHostname());
|
||||
}
|
||||
|
||||
static compile(domainOpt) {
|
||||
return [ FilterOriginHit.fid, domainOpt ];
|
||||
getHostname() {
|
||||
return filterOrigin.trieContainer.extractHostname(this.i, this.n);
|
||||
}
|
||||
|
||||
static compile(hostname) {
|
||||
return [ FilterOriginHit.fid, hostname ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterOriginHit(args[1]);
|
||||
return new FilterOriginHit(
|
||||
filterOrigin.trieContainer.storeHostname(args[1]),
|
||||
args[1].length
|
||||
);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterOriginHit(args[1]);
|
||||
return new FilterOriginHit(args[1], args[2]);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1287,43 +1293,28 @@ registerFilterClass(FilterOriginHit);
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterOriginMiss = class {
|
||||
constructor(hostname) {
|
||||
this.hostname = hostname.slice(1);
|
||||
}
|
||||
|
||||
const FilterOriginMiss = class extends FilterOriginHit {
|
||||
match() {
|
||||
const haystack = $docHostname;
|
||||
if ( haystack.endsWith(this.hostname) ) {
|
||||
const offset = haystack.length - this.hostname.length;
|
||||
if (
|
||||
offset === 0 ||
|
||||
haystack.charCodeAt(offset-1) === 0x2E /* '.' */
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return super.match() === false;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.domains.push(`~${this.hostname}`);
|
||||
details.domains.push(`~${this.getHostname()}`);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, `~${this.hostname}` ];
|
||||
}
|
||||
|
||||
static compile(domainOpt) {
|
||||
return [ FilterOriginMiss.fid, domainOpt ];
|
||||
static compile(hostname) {
|
||||
return [ FilterOriginMiss.fid, hostname ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterOriginMiss(args[1]);
|
||||
return new FilterOriginMiss(
|
||||
filterOrigin.trieContainer.storeHostname(args[1]),
|
||||
args[1].length
|
||||
);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterOriginMiss(args[1]);
|
||||
return new FilterOriginMiss(args[1], args[2]);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1342,7 +1333,7 @@ const FilterOriginHitSet = class {
|
|||
match() {
|
||||
if ( this.oneOf === null ) {
|
||||
this.oneOf = filterOrigin.trieContainer.fromIterable(
|
||||
this.domainOpt.split('|')
|
||||
FilterParser.domainOptIterator(this.domainOpt)
|
||||
);
|
||||
}
|
||||
return this.oneOf.matches($docHostname) !== -1;
|
||||
|
@ -1383,35 +1374,15 @@ registerFilterClass(FilterOriginHitSet);
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterOriginMissSet = class {
|
||||
constructor(domainOpt, noneOf = null) {
|
||||
this.domainOpt = domainOpt;
|
||||
this.noneOf = noneOf !== null
|
||||
? filterOrigin.trieContainer.createOne(noneOf)
|
||||
: null;
|
||||
}
|
||||
|
||||
const FilterOriginMissSet = class extends FilterOriginHitSet {
|
||||
match() {
|
||||
if ( this.noneOf === null ) {
|
||||
this.noneOf = filterOrigin.trieContainer.fromIterable(
|
||||
this.domainOpt.replace(/~/g, '').split('|')
|
||||
);
|
||||
}
|
||||
return this.noneOf.matches($docHostname) === -1;
|
||||
return super.match() === false;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.domains.push(this.domainOpt);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [
|
||||
this.fid,
|
||||
this.domainOpt,
|
||||
this.noneOf !== null
|
||||
? filterOrigin.trieContainer.compileOne(this.noneOf)
|
||||
: null
|
||||
];
|
||||
details.domains.push(
|
||||
'~' + this.domainOpt.replace('|', '|~')
|
||||
);
|
||||
}
|
||||
|
||||
static compile(domainOpt) {
|
||||
|
@ -1435,6 +1406,74 @@ registerFilterClass(FilterOriginMissSet);
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterOriginEntityHit = class {
|
||||
constructor(entity) {
|
||||
this.entity = entity;
|
||||
}
|
||||
|
||||
match() {
|
||||
const entity = $docEntity.compute();
|
||||
if ( entity === '' ) { return false; }
|
||||
const offset = entity.length - this.entity.length;
|
||||
if ( offset < 0 ) { return false; }
|
||||
if ( entity.charCodeAt(offset) !== this.entity.charCodeAt(0) ) {
|
||||
return false;
|
||||
}
|
||||
if ( entity.endsWith(this.entity) === false ) { return false; }
|
||||
return offset === 0 || entity.charCodeAt(offset-1) === 0x2E /* '.' */;
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, this.entity ];
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.domains.push(`${this.entity}.*`);
|
||||
}
|
||||
|
||||
static compile(entity) {
|
||||
return [ FilterOriginEntityHit.fid, entity ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterOriginEntityHit(args[1]);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterOriginEntityHit(args[1]);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterOriginEntityHit);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterOriginEntityMiss = class extends FilterOriginEntityHit {
|
||||
match() {
|
||||
return super.match() === false;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
details.domains.push(`~${this.entity}.*`);
|
||||
}
|
||||
|
||||
static compile(entity) {
|
||||
return [ FilterOriginEntityMiss.fid, entity ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return new FilterOriginEntityMiss(args[1]);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return new FilterOriginEntityMiss(args[1]);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterOriginEntityMiss);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterDataHolder = class {
|
||||
constructor(dataType, data) {
|
||||
this.dataType = dataType;
|
||||
|
@ -1549,6 +1588,12 @@ const FilterCollection = class {
|
|||
} while ( i !== 0 );
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
this.forEach(iunit => {
|
||||
filterUnits[iunit].logData(details);
|
||||
});
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return [ this.fid, this.i ];
|
||||
}
|
||||
|
@ -1580,7 +1625,36 @@ const FilterCollection = class {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterComposite = class extends FilterCollection {
|
||||
const FilterCompositeAny = class extends FilterCollection {
|
||||
match() {
|
||||
const sequences = filterSequences;
|
||||
const units = filterUnits;
|
||||
let i = this.i;
|
||||
while ( i !== 0 ) {
|
||||
if ( units[sequences[i+0]].match() ) { return true; }
|
||||
i = sequences[i+1];
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static compile(fdata) {
|
||||
return FilterCollection.compile(FilterCompositeAny, fdata);
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return FilterCollection.fromCompiled(FilterCompositeAny, args);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return FilterCollection.fromSelfie(FilterCompositeAny, args);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterCompositeAny);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
const FilterCompositeAll = class extends FilterCollection {
|
||||
match() {
|
||||
const sequences = filterSequences;
|
||||
const units = filterUnits;
|
||||
|
@ -1622,26 +1696,20 @@ const FilterComposite = class extends FilterCollection {
|
|||
return details;
|
||||
}
|
||||
|
||||
logData(details) {
|
||||
this.forEach(iunit => {
|
||||
filterUnits[iunit].logData(details);
|
||||
});
|
||||
}
|
||||
|
||||
static compile(fdata) {
|
||||
return FilterCollection.compile(FilterComposite, fdata);
|
||||
return FilterCollection.compile(FilterCompositeAll, fdata);
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
return FilterCollection.fromCompiled(FilterComposite, args);
|
||||
return FilterCollection.fromCompiled(FilterCompositeAll, args);
|
||||
}
|
||||
|
||||
static fromSelfie(args) {
|
||||
return FilterCollection.fromSelfie(FilterComposite, args);
|
||||
return FilterCollection.fromSelfie(FilterCompositeAll, args);
|
||||
}
|
||||
};
|
||||
|
||||
registerFilterClass(FilterComposite);
|
||||
registerFilterClass(FilterCompositeAll);
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
|
@ -2001,7 +2069,7 @@ const FilterBucket = class extends FilterCollection {
|
|||
filterUnits[iunit] = null;
|
||||
return;
|
||||
}
|
||||
// FilterComposite is assumed here, i.e. with conditions.
|
||||
// FilterCompositeAll is assumed here, i.e. with conditions.
|
||||
if ( f.n === 1 ) {
|
||||
filterUnits[iunit] = null;
|
||||
iunit = filterSequences[f.i];
|
||||
|
@ -2037,7 +2105,7 @@ const FilterParser = class {
|
|||
this.cantWebsocket = vAPI.cantWebsocket;
|
||||
this.domainOpt = '';
|
||||
this.noTokenHash = urlTokenizer.noTokenHash;
|
||||
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
|
||||
this.reBadDomainOptChars = /[+?^${}()[\]\\]/;
|
||||
this.reHostnameRule1 = /^\w[\w.-]*[a-z]$/i;
|
||||
this.reHostnameRule2 = /^\w[\w.-]*[a-z]\^?$/i;
|
||||
this.reCanTrimCarets1 = /^[^*]*$/;
|
||||
|
@ -2651,6 +2719,47 @@ const FilterParser = class {
|
|||
) &&
|
||||
this.domainOpt.indexOf('~') === -1;
|
||||
}
|
||||
|
||||
domainIsEntity(s) {
|
||||
const l = s.length;
|
||||
return l > 2 &&
|
||||
s.charCodeAt(l-1) === 0x2A /* '*' */ &&
|
||||
s.charCodeAt(l-2) === 0x2E /* '.' */;
|
||||
}
|
||||
|
||||
static domainOptIterator(domainOpt) {
|
||||
return new FilterParser.DomainOptIterator(domainOpt);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
FilterParser.DomainOptIterator = class {
|
||||
constructor(domainOpt) {
|
||||
this.domainOpt = domainOpt;
|
||||
this.i = 0;
|
||||
this.value = undefined;
|
||||
this.done = false;
|
||||
}
|
||||
next() {
|
||||
if ( this.i === -1 ) {
|
||||
this.value = undefined;
|
||||
this.done = true;
|
||||
return this;
|
||||
}
|
||||
let pos = this.domainOpt.indexOf('|', this.i);
|
||||
if ( pos !== -1 ) {
|
||||
this.value = this.domainOpt.slice(this.i, pos);
|
||||
this.i = pos + 1;
|
||||
} else {
|
||||
this.value = this.domainOpt.slice(this.i);
|
||||
this.i = -1;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
[Symbol.iterator]() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
@ -3013,37 +3122,50 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
|||
|
||||
parsed.makeToken();
|
||||
|
||||
const units = [];
|
||||
|
||||
// Special pattern/option cases:
|
||||
// - `*$domain=...`
|
||||
// - `|http://$domain=...`
|
||||
// - `|https://$domain=...`
|
||||
// The semantic of "just-origin" filters is that contrary to normal
|
||||
// filters, the original filter is split into as many filters as there
|
||||
// are entries in the `domain=` option.
|
||||
if ( parsed.isJustOrigin() ) {
|
||||
const hostnames = parsed.domainOpt.split('|');
|
||||
if ( parsed.f === '*' ) {
|
||||
const tokenHash = parsed.tokenHash;
|
||||
if ( parsed.f === '*' || parsed.f.startsWith('http*') ) {
|
||||
parsed.tokenHash = this.anyTokenHash;
|
||||
} else if /* 'https:' */ ( parsed.f.startsWith('https') ) {
|
||||
parsed.tokenHash = this.anyHTTPSTokenHash;
|
||||
} else /* 'http:' */ {
|
||||
parsed.tokenHash = this.anyHTTPTokenHash;
|
||||
}
|
||||
for ( const hn of hostnames ) {
|
||||
const entities = [];
|
||||
for ( const hn of FilterParser.domainOptIterator(parsed.domainOpt) ) {
|
||||
if ( parsed.domainIsEntity(hn) === false ) {
|
||||
this.compileToAtomicFilter(parsed, hn, writer);
|
||||
} else {
|
||||
entities.push(hn);
|
||||
}
|
||||
}
|
||||
if ( entities.length === 0 ) { return true; }
|
||||
parsed.tokenHash = tokenHash;
|
||||
const leftAnchored = (parsed.anchor & 0b010) !== 0;
|
||||
for ( const entity of entities ) {
|
||||
const units = [];
|
||||
filterPattern.compile(parsed, units);
|
||||
if ( leftAnchored ) { units.push(FilterAnchorLeft.compile()); }
|
||||
filterOrigin.compile(entity, true, units);
|
||||
this.compileToAtomicFilter(
|
||||
parsed, FilterCompositeAll.compile(units), writer
|
||||
);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const units = [];
|
||||
|
||||
// Pattern
|
||||
filterPattern.compile(parsed, units);
|
||||
|
||||
// Type
|
||||
// EXPERIMENT: $requestTypeBit
|
||||
//if ( (parsed.typeBits & allNetworkTypesBits) !== 0 ) {
|
||||
// units.unshift(FilterType.compile(parsed));
|
||||
// parsed.typeBits &= ~allNetworkTypesBits;
|
||||
//}
|
||||
|
||||
// Anchor
|
||||
if ( (parsed.anchor & 0b100) !== 0 ) {
|
||||
if ( parsed.isPureHostname ) {
|
||||
|
@ -3061,7 +3183,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
|||
// Origin
|
||||
if ( parsed.domainOpt !== '' ) {
|
||||
filterOrigin.compile(
|
||||
parsed,
|
||||
parsed.domainOpt,
|
||||
units.length !== 0 && filterClasses[units[0][0]].isSlow === true,
|
||||
units
|
||||
);
|
||||
|
@ -3079,7 +3201,7 @@ FilterContainer.prototype.compile = function(raw, writer) {
|
|||
|
||||
const fdata = units.length === 1
|
||||
? units[0]
|
||||
: FilterComposite.compile(units);
|
||||
: FilterCompositeAll.compile(units);
|
||||
|
||||
this.compileToAtomicFilter(parsed, fdata, writer);
|
||||
|
||||
|
@ -3211,6 +3333,8 @@ FilterContainer.prototype.realmMatchAndFetchData = function(
|
|||
FilterContainer.prototype.matchAndFetchData = function(fctxt, type) {
|
||||
$requestURL = urlTokenizer.setURL(fctxt.url);
|
||||
$docHostname = fctxt.getDocHostname();
|
||||
$docDomain = fctxt.getDocDomain();
|
||||
$docEntity.reset();
|
||||
$requestHostname = fctxt.getHostname();
|
||||
|
||||
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
|
||||
|
@ -3399,7 +3523,9 @@ FilterContainer.prototype.matchStringReverse = function(type, url) {
|
|||
this.$filterUnit = 0;
|
||||
|
||||
// These registers will be used by various filters
|
||||
$docHostname = $requestHostname = µb.URI.hostnameFromURI(url);
|
||||
$docHostname = $requestHostname = vAPI.hostnameFromNetworkURL(url);
|
||||
$docDomain = vAPI.domainFromHostname($docHostname);
|
||||
$docEntity.reset();
|
||||
|
||||
// Exception filters
|
||||
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
|
||||
|
@ -3431,8 +3557,6 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
|
|||
modifiers |= 0b0001;
|
||||
}
|
||||
}
|
||||
// EXPERIMENT: $requestTypeBit
|
||||
//$requestTypeBit = 1 << ((typeValue >>> 4) - 1);
|
||||
if ( (modifiers & 0b0001) !== 0 ) {
|
||||
if ( typeValue === undefined ) { return 0; }
|
||||
typeValue |= 0x80000000;
|
||||
|
@ -3446,6 +3570,8 @@ FilterContainer.prototype.matchString = function(fctxt, modifiers = 0) {
|
|||
|
||||
// These registers will be used by various filters
|
||||
$docHostname = fctxt.getDocHostname();
|
||||
$docDomain = fctxt.getDocDomain();
|
||||
$docEntity.reset();
|
||||
$requestHostname = fctxt.getHostname();
|
||||
|
||||
// Important block filters.
|
||||
|
@ -3666,7 +3792,7 @@ FilterContainer.prototype.bucketHistogram = function() {
|
|||
|
||||
"FilterHostnameDict" Content => 60772}
|
||||
"FilterPatternPlain" => 26432}
|
||||
"FilterComposite" => 17125}
|
||||
"FilterCompositeAll" => 17125}
|
||||
"FilterPlainTrie Content" => 13519}
|
||||
"FilterAnchorHnLeft" => 11931}
|
||||
"FilterOriginHit" => 5524}
|
||||
|
@ -3729,7 +3855,7 @@ FilterContainer.prototype.filterClassHistogram = function() {
|
|||
filterClassDetails.get(1001).count += f.size;
|
||||
continue;
|
||||
}
|
||||
if ( f instanceof FilterComposite ) {
|
||||
if ( f instanceof FilterCompositeAll ) {
|
||||
let i = f.i;
|
||||
while ( i !== 0 ) {
|
||||
countFilter(filterUnits[filterSequences[i+0]]);
|
||||
|
|
Loading…
Reference in New Issue