Add support for `cname` type and `denyallow` option

This concerns the static network filtering engine.

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/943

* * *

New static network filter type: `cname`

By default, network requests which are result of
resolving a canonical name are subject to filtering.
This filtering can be bypassed by creating exception
filters using the `cname` option. For example:

    @@*$cname

The filter above tells the network filtering engine
to except network requests which fulfill all the
following conditions:

- network request is blocked
- network request is that of an unaliased hostname

Filter list authors are discouraged from using
exception filters of `cname` type, unless there no
other practical solution such that maintenance
burden become the greater issue. Of course, such
exception filters should be as narrow as possible,
i.e. apply to specific domain, etc.

* * *

New static network filter option: `denyallow`

The purpose of `denyallow` is bring
default-deny/allow-exceptionally ability into static
network filtering arsenal. Example of usage:

    *$3p,script, \
        denyallow=x.com|y.com \
        domain=a.com|b.com

The above filter tells the network filtering engine that
when the context is `a.com` or `b.com`, block all
3rd-party scripts except those from `x.com` and `y.com`.

Essentially, the new `denyallow` option makes it easier
to implement default-deny/allow-exceptionally in static
filter lists, whereas before this had to be done with
unwieldy regular expressions[1], or through the mix of
broadly blocking filters along with exception filters[2].

[1] https://hg.adblockplus.org/ruadlist/rev/f362910bc9a0

[2] Typically filters which pattern are of the
    form `|http*://`
This commit is contained in:
Raymond Hill 2020-03-15 12:23:25 -04:00
parent 84d4111c05
commit c3bc2c741d
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
6 changed files with 172 additions and 41 deletions

View File

@ -130,8 +130,8 @@ const µBlock = (( ) => { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 23, // Increase when compiled format changes
selfieMagic: 25, // Increase when selfie format changes
compiledMagic: 26, // Increase when compiled format changes
selfieMagic: 26, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -81,11 +81,9 @@
this.setDocOriginFromURL(details.documentUrl);
} else {
const pageStore = µBlock.pageStoreFromTabId(this.tabId);
const docStore = pageStore && pageStore.frames.get(this.docId);
const docStore = pageStore && pageStore.getFrame(this.docId);
if ( docStore ) {
this.docOrigin = undefined;
this.docHostname = docStore.pageHostname;
this.docDomain = docStore.pageDomain;
this.setDocOriginFromURL(docStore.rawURL);
} else {
this.setDocOrigin(this.tabOrigin);
}

View File

@ -532,8 +532,8 @@ const retrieveContentScriptParameters = function(senderDetails, request) {
// `generichide` must be evaluated in the frame context.
if ( noCosmeticFiltering === false ) {
const genericHide =
µb.staticNetFilteringEngine.matchStringElementHide(
'generic',
µb.staticNetFilteringEngine.matchStringReverse(
'generichide',
request.url
);
response.noGenericCosmeticFiltering = genericHide === 2;
@ -559,8 +559,8 @@ const retrieveContentScriptParameters = function(senderDetails, request) {
// Add support for `specifichide`.
if ( noCosmeticFiltering === false ) {
const specificHide =
µb.staticNetFilteringEngine.matchStringElementHide(
'specific',
µb.staticNetFilteringEngine.matchStringReverse(
'specifichide',
request.url
);
response.noSpecificCosmeticFiltering = specificHide === 2;

View File

@ -151,10 +151,7 @@ NetFilteringResultCache.prototype.shelfLife = 15000;
/******************************************************************************/
// Frame stores are used solely to associate a URL with a frame id. The
// name `pageHostname` is used because of historical reasons. A more
// appropriate name is `frameHostname` -- something to do in a future
// refactoring.
// Frame stores are used solely to associate a URL with a frame id.
// To mitigate memory churning
const frameStoreJunkyard = [];
@ -166,15 +163,19 @@ const FrameStore = class {
}
init(frameURL) {
const µburi = µb.URI;
this.pageHostname = µburi.hostnameFromURI(frameURL);
this.pageDomain =
µburi.domainFromHostname(this.pageHostname) || this.pageHostname;
this.exceptCname = undefined;
this.rawURL = frameURL;
if ( frameURL !== undefined ) {
this.hostname = vAPI.hostnameFromURI(frameURL);
this.domain =
vAPI.domainFromHostname(this.hostname) || this.hostname;
}
return this;
}
dispose() {
this.pageHostname = this.pageDomain = '';
this.exceptCname = undefined;
this.rawURL = this.hostname = this.domain = '';
if ( frameStoreJunkyard.length < frameStoreJunkyardMax ) {
frameStoreJunkyard.push(this);
}
@ -239,7 +240,6 @@ const PageStore = class {
this.rawURL = tabContext.rawURL;
this.hostnameToCountMap = new Map();
this.contentLastModified = 0;
this.frames = new Map();
this.logData = undefined;
this.perLoadBlockedRequestCount = 0;
this.perLoadAllowedRequestCount = 0;
@ -250,6 +250,9 @@ const PageStore = class {
this.internalRedirectionCount = 0;
this.extraData.clear();
this.frames = new Map();
this.setFrame(0, tabContext.rawURL);
// The current filtering context is cloned because:
// - We may be called with or without the current context having been
// initialized.
@ -303,6 +306,7 @@ const PageStore = class {
// As part of https://github.com/chrisaljoudi/uBlock/issues/405
// URL changed, force a re-evaluation of filtering switch
this.rawURL = tabContext.rawURL;
this.setFrame(0, this.rawURL);
return this;
}
@ -541,9 +545,22 @@ const PageStore = class {
// Static filtering has lowest precedence.
if ( result === 0 || result === 3 ) {
result = µb.staticNetFilteringEngine.matchString(fctxt);
if ( result !== 0 && µb.logger.enabled ) {
fctxt.filter = µb.staticNetFilteringEngine.toLogData();
const snfe = µb.staticNetFilteringEngine;
result = snfe.matchString(fctxt);
if ( result !== 0 ) {
if ( µb.logger.enabled ) {
fctxt.filter = snfe.toLogData();
}
// https://github.com/uBlockOrigin/uBlock-issues/issues/943
// Blanket-except blocked aliased canonical hostnames?
if (
result === 1 &&
fctxt.aliasURL !== undefined &&
snfe.isBlockImportant() === false &&
this.shouldExceptCname(fctxt)
) {
return 2;
}
}
}
@ -646,6 +663,40 @@ const PageStore = class {
return 1;
}
shouldExceptCname(fctxt) {
let exceptCname;
let frameStore;
if ( fctxt.docId !== undefined ) {
frameStore = this.getFrame(fctxt.docId);
if ( frameStore instanceof Object ) {
exceptCname = frameStore.exceptCname;
}
}
if ( exceptCname === undefined ) {
const result = µb.staticNetFilteringEngine.matchStringReverse(
'cname',
frameStore instanceof Object
? frameStore.rawURL
: fctxt.getDocOrigin()
);
if ( result === 2 ) {
exceptCname = µb.logger.enabled
? µb.staticNetFilteringEngine.toLogData()
: true;
} else {
exceptCname = false;
}
if ( frameStore instanceof Object ) {
frameStore.exceptCname = exceptCname;
}
}
if ( exceptCname === false ) { return false; }
if ( exceptCname instanceof Object ) {
fctxt.setFilter(exceptCname);
}
return true;
}
getBlockedResources(request, response) {
const normalURL = µb.normalizePageURL(this.tabId, request.frameURL);
const resources = request.resources;

View File

@ -172,13 +172,13 @@ const fromCosmeticFilter = async function(details) {
domain: µBlock.URI.domainFromHostname(hostname),
hostname: hostname,
ignoreGeneric:
µBlock.staticNetFilteringEngine.matchStringElementHide(
'generic',
µBlock.staticNetFilteringEngine.matchStringReverse(
'generichide',
details.url
) === 2,
ignoreSpecific:
µBlock.staticNetFilteringEngine.matchStringElementHide(
'specific',
µBlock.staticNetFilteringEngine.matchStringReverse(
'specifichide',
details.url
) === 2,
rawFilter: details.rawFilter

View File

@ -75,10 +75,11 @@ const typeNameToTypeValue = {
'specifichide': 16 << 4,
'inline-font': 17 << 4,
'inline-script': 18 << 4,
'data': 19 << 4, // special: a generic data holder
'redirect': 20 << 4,
'webrtc': 21 << 4,
'unsupported': 22 << 4,
'cname': 19 << 4,
'data': 20 << 4, // special: a generic data holder
'redirect': 21 << 4,
'webrtc': 22 << 4,
'unsupported': 23 << 4,
};
const otherTypeBitValue = typeNameToTypeValue.other;
@ -119,10 +120,11 @@ const typeValueToTypeName = {
16: 'specifichide',
17: 'inline-font',
18: 'inline-script',
19: 'data',
20: 'redirect',
21: 'webrtc',
22: 'unsupported',
19: 'cname',
20: 'data',
21: 'redirect',
22: 'webrtc',
23: 'unsupported',
};
// https://github.com/gorhill/uBlock/issues/1493
@ -130,6 +132,7 @@ const typeValueToTypeName = {
const toNormalizedType = {
'all': 'all',
'beacon': 'ping',
'cname': 'cname',
'css': 'stylesheet',
'data': 'data',
'doc': 'main_frame',
@ -220,8 +223,16 @@ const toLogDataInternal = function(categoryBits, tokenHash, iunit) {
const pattern = [];
const regex = [];
const options = [];
const denyallow = [];
const domains = [];
const logData = { pattern, regex, domains, options, isRegex: false };
const logData = {
pattern,
regex,
denyallow,
domains,
options,
isRegex: false,
};
filterUnits[iunit].logData(logData);
if ( categoryBits & 0x002 ) {
logData.options.unshift('important');
@ -246,6 +257,9 @@ const toLogDataInternal = function(categoryBits, tokenHash, iunit) {
if ( categoryBits & 0x001 ) {
raw = '@@' + raw;
}
if ( denyallow.length !== 0 ) {
options.push(`denyallow=${denyallow.join('|')}`);
}
if ( domains.length !== 0 ) {
options.push(`domain=${domains.join('|')}`);
}
@ -273,6 +287,10 @@ const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
/******************************************************************************/
// TODO: Unify [ string instance, string usage instance ] pairs
/******************************************************************************/
let filterUnits = [ null ];
let filterSequences = new Uint32Array(131072);
@ -1697,6 +1715,50 @@ registerFilterClass(FilterHostnameDict);
/******************************************************************************/
const FilterDenyAllow = class {
constructor(s, trieArgs) {
this.s = s;
this.hndict = FilterHostnameDict.trieContainer.createOne(trieArgs);
}
match() {
return this.hndict.matches($requestHostname) === -1;
}
logData(details) {
details.denyallow.push(this.s);
}
toSelfie() {
return [
this.fid,
this.s,
FilterHostnameDict.trieContainer.compileOne(this.hndict),
];
}
static compile(details) {
return [ FilterDenyAllow.fid, details.denyallow ];
}
static unitFromCompiled(args) {
const f = new FilterDenyAllow(args[1]);
for ( const hn of args[1].split('|') ) {
if ( hn === '' ) { continue; }
f.hndict.add(hn);
}
return filterUnits.push(f) - 1;
}
static fromSelfie(args) {
return new FilterDenyAllow(...args.slice(1));
}
};
registerFilterClass(FilterDenyAllow);
/******************************************************************************/
// Dictionary of hostnames for filters which only purpose is to match
// the document origin.
@ -2126,6 +2188,7 @@ const FilterParser = class {
this.party = AnyParty;
this.fopts = '';
this.domainOpt = '';
this.denyallow = '';
this.isPureHostname = false;
this.isRegex = false;
this.raw = '';
@ -2184,7 +2247,7 @@ const FilterParser = class {
}
}
parseDomainOption(s) {
parseHostnameList(s) {
if ( this.reHasUnicode.test(s) ) {
const hostnames = s.split('|');
let i = hostnames.length;
@ -2222,13 +2285,21 @@ const FilterParser = class {
// Detect and discard filter if domain option contains nonsensical
// characters.
if ( opt.startsWith('domain=') ) {
this.domainOpt = this.parseDomainOption(opt.slice(7));
this.domainOpt = this.parseHostnameList(opt.slice(7));
if ( this.domainOpt === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt.startsWith('denyallow=') ) {
this.denyallow = this.parseHostnameList(opt.slice(10));
if ( this.denyallow === '' ) {
this.unsupported = true;
break;
}
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
@ -2374,7 +2445,7 @@ const FilterParser = class {
this.unsupported = true;
return this;
}
this.parseOptions(s.slice(pos + 1));
this.parseOptions(s.slice(pos + 1).trim());
if ( this.unsupported ) { return this; }
s = s.slice(0, pos);
}
@ -3015,6 +3086,11 @@ FilterContainer.prototype.compile = function(raw, writer) {
);
}
// Deny-allow
if ( parsed.denyallow !== '' ) {
units.push(FilterDenyAllow.compile(parsed));
}
// Data
if ( parsed.dataType !== undefined ) {
units.push(FilterDataHolder.compile(parsed));
@ -3334,8 +3410,8 @@ FilterContainer.prototype.realmMatchString = function(
// https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/
// Add support for `specifichide`.
FilterContainer.prototype.matchStringElementHide = function(type, url) {
const typeBits = typeNameToTypeValue[`${type}hide`] | 0x80000000;
FilterContainer.prototype.matchStringReverse = function(type, url) {
const typeBits = typeNameToTypeValue[type] | 0x80000000;
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(url);
@ -3425,6 +3501,12 @@ FilterContainer.prototype.toLogData = function() {
/******************************************************************************/
FilterContainer.prototype.isBlockImportant = function() {
return (this.$catbits & BlockImportant) === BlockImportant;
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.acceptedCount - this.discardedCount;
};