Add support for procedural :not to HTML filtering

Related issue: <https://github.com/gorhill/uBlock/issues/3683>

Additionally, improve compile-time error reporting in the logger
This commit is contained in:
Raymond Hill 2018-12-15 10:46:17 -05:00
parent 01599b9653
commit 261ef8c510
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
6 changed files with 164 additions and 160 deletions

View File

@ -361,25 +361,6 @@ let FilterContainer = function() {
this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/;
this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
this.reNeedHostname = new RegExp([
'^',
'(?:',
[
'.+?:has',
'.+?:has-text',
'.+?:if',
'.+?:if-not',
'.+?:matches-css(?:-before|-after)?',
'.*?:xpath',
'.+?:style',
'.+?:-abp-contains', // ABP-specific for `:has-text`
'.+?:-abp-has', // ABP-specific for `:if`
'.+?:contains' // Adguard-specific for `:has-text`
].join('|'),
')',
'\\(.+\\)',
'$'
].join(''));
this.selectorCache = new Map();
this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes
@ -584,66 +565,56 @@ FilterContainer.prototype.compileGenericHideSelector = function(
writer
) {
const selector = parsed.suffix;
const type = selector.charCodeAt(0);
let key;
// For some selectors, it is mandatory to have a hostname or entity:
// ##.foo:-abp-contains(...)
// ##.foo:-abp-has(...)
// ##.foo:contains(...)
// ##.foo:has(...)
// ##.foo:has-text(...)
// ##.foo:if(...)
// ##.foo:if-not(...)
// ##.foo:matches-css(...)
// ##.foo:matches-css-after(...)
// ##.foo:matches-css-before(...)
// ##:xpath(...)
// ##.foo:style(...)
if ( this.reNeedHostname.test(selector) ) {
if ( type === 0x23 /* '#' */ ) {
key = this.keyFromSelector(selector);
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
// - ###ad-bigbox
if ( key === selector ) {
writer.push([ 0, key.slice(1) ]);
return;
}
} else if ( type === 0x2E /* '.' */ ) {
key = this.keyFromSelector(selector);
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
// - ##.ads-bigbox
if ( key === selector ) {
writer.push([ 2, key.slice(1) ]);
return;
}
}
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
// Invalid cosmetic filter, possible reasons:
// - Bad syntax
// - Procedural filters (can't be generic): the compiled version of
// a procedural selector is NEVER equal to its raw version.
if ( compiled === undefined || compiled !== selector ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
error: 'Cosmetic filtering invalid generic filter: ##' + selector
error: `Invalid generic cosmetic filter in ${who} : ##${selector}`
});
return;
}
let type = selector.charCodeAt(0);
if ( type === 0x23 /* '#' */ ) {
const key = this.keyFromSelector(selector);
if ( key === undefined ) { return; }
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
if ( key === selector ) {
writer.push([ 0 /* lg */, key.slice(1) ]);
// Complex selector-based CSS rule:
// - ###tads + div + .c
// - ##.rscontainer > .ellip
if ( key !== undefined ) {
writer.push([
type === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
selector ]
);
return;
}
// Complex selector-based CSS rule.
if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) {
writer.push([ 1 /* lg+ */, key.slice(1), selector ]);
}
return;
}
if ( type === 0x2E /* '.' */ ) {
const key = this.keyFromSelector(selector);
if ( key === undefined ) { return; }
// Simple selector-based CSS rule: no need to test for whether the
// selector is valid, the regex took care of this. Most generic
// selector falls into that category.
if ( key === selector ) {
writer.push([ 2 /* lg */, key.slice(1) ]);
return;
}
// Complex selector-based CSS rule.
if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) {
writer.push([ 3 /* lg+ */, key.slice(1), selector ]);
}
return;
}
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; }
// TODO: Detect and error on procedural cosmetic filters.
// https://github.com/gorhill/uBlock/issues/909
// Anything which contains a plain id/class selector can be classified
@ -651,9 +622,8 @@ FilterContainer.prototype.compileGenericHideSelector = function(
const matches = this.rePlainSelectorEx.exec(selector);
if ( matches !== null ) {
const key = matches[1] || matches[2];
type = key.charCodeAt(0);
writer.push([
type === 0x23 ? 1 : 3 /* lg+ */,
key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3,
key.slice(1),
selector
]);
@ -685,7 +655,13 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
) {
// Procedural cosmetic filters are acceptable as generic exception filters.
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { return; }
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
error: `Invalid cosmetic filter in ${who} : #@#${parsed.suffix}`
});
return;
}
// https://github.com/chrisaljoudi/uBlock/issues/497
// All generic exception filters are put in the same bucket: they are
@ -708,7 +684,13 @@ FilterContainer.prototype.compileSpecificSelector = function(
}
let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { return; }
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
error: `Invalid cosmetic filter in ${who} : ##${parsed.suffix}`
});
return;
}
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname);

View File

@ -24,16 +24,24 @@
/******************************************************************************/
µBlock.htmlFilteringEngine = (function() {
const api = {};
const µb = µBlock;
const pselectors = new Map();
const duplicates = new Set();
const µb = µBlock,
pselectors = new Map(),
duplicates = new Set();
let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
acceptedCount = 0,
discardedCount = 0,
docRegister;
const api = {
get acceptedCount() {
return acceptedCount;
},
get discardedCount() {
return discardedCount;
}
};
const PSelectorHasTextTask = function(task) {
let arg0 = task[1], arg1;
if ( Array.isArray(task[1]) ) {
@ -42,8 +50,8 @@
this.needle = new RegExp(arg0, arg1);
};
PSelectorHasTextTask.prototype.exec = function(input) {
let output = [];
for ( let node of input ) {
const output = [];
for ( const node of input ) {
if ( this.needle.test(node.textContent) ) {
output.push(node);
}
@ -61,8 +69,8 @@
}
});
PSelectorIfTask.prototype.exec = function(input) {
let output = [];
for ( let node of input ) {
const output = [];
for ( const node of input ) {
if ( this.pselector.test(node) === this.target ) {
output.push(node);
}
@ -81,10 +89,10 @@
this.xpe = task[1];
};
PSelectorXpathTask.prototype.exec = function(input) {
let output = [],
xpe = docRegister.createExpression(this.xpe, null),
xpr = null;
for ( let node of input ) {
const output = [];
const xpe = docRegister.createExpression(this.xpe, null);
let xpr = null;
for ( const node of input ) {
xpr = xpe.evaluate(
node,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
@ -92,7 +100,7 @@
);
let j = xpr.snapshotLength;
while ( j-- ) {
node = xpr.snapshotItem(j);
const node = xpr.snapshotItem(j);
if ( node.nodeType === 1 ) {
output.push(node);
}
@ -108,6 +116,7 @@
[ ':has-text', PSelectorHasTextTask ],
[ ':if', PSelectorIfTask ],
[ ':if-not', PSelectorIfNotTask ],
[ ':not', PSelectorIfNotTask ],
[ ':xpath', PSelectorXpathTask ]
]);
}
@ -115,13 +124,13 @@
this.selector = o.selector;
this.tasks = [];
if ( !o.tasks ) { return; }
for ( let task of o.tasks ) {
let ctor = this.operatorToTaskMap.get(task[0]);
for ( const task of o.tasks ) {
const ctor = this.operatorToTaskMap.get(task[0]);
if ( ctor === undefined ) {
this.invalid = true;
break;
}
let pselector = new ctor(task);
const pselector = new ctor(task);
if ( pselector instanceof PSelectorIfTask && pselector.invalid ) {
this.invalid = true;
break;
@ -132,7 +141,7 @@
PSelector.prototype.operatorToTaskMap = undefined;
PSelector.prototype.invalid = false;
PSelector.prototype.prime = function(input) {
let root = input || docRegister;
const root = input || docRegister;
if ( this.selector !== '' ) {
return root.querySelectorAll(this.selector);
}
@ -141,7 +150,7 @@
PSelector.prototype.exec = function(input) {
if ( this.invalid ) { return []; }
let nodes = this.prime(input);
for ( let task of this.tasks ) {
for ( const task of this.tasks ) {
if ( nodes.length === 0 ) { break; }
nodes = task.exec(nodes);
}
@ -149,10 +158,12 @@
};
PSelector.prototype.test = function(input) {
if ( this.invalid ) { return false; }
let nodes = this.prime(input), AA = [ null ], aa;
for ( let node of nodes ) {
AA[0] = node; aa = AA;
for ( var task of this.tasks ) {
const nodes = this.prime(input);
const AA = [ null ];
for ( const node of nodes ) {
AA[0] = node;
let aa = AA;
for ( const task of this.tasks ) {
aa = task.exec(aa);
if ( aa.length === 0 ) { break; }
}
@ -182,11 +193,11 @@
pselector = new PSelector(JSON.parse(selector));
pselectors.set(selector, pselector);
}
let nodes = pselector.exec(),
i = nodes.length,
const nodes = pselector.exec();
let i = nodes.length,
modified = false;
while ( i-- ) {
let node = nodes[i];
const node = nodes[i];
if ( node.parentNode !== null ) {
node.parentNode.removeChild(node);
modified = true;
@ -199,11 +210,11 @@
};
const applyCSSSelector = function(details, selector) {
let nodes = docRegister.querySelectorAll(selector),
i = nodes.length,
const nodes = docRegister.querySelectorAll(selector);
let i = nodes.length,
modified = false;
while ( i-- ) {
let node = nodes[i];
const node = nodes[i];
if ( node.parentNode !== null ) {
node.parentNode.removeChild(node);
modified = true;
@ -228,16 +239,22 @@
};
api.compile = function(parsed, writer) {
let selector = parsed.suffix.slice(1).trim(),
compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; }
const selector = parsed.suffix.slice(1).trim();
const compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
error: `Invalid HTML filter in ${who} : ##${selector}`
});
return;
}
// 1002 = html filtering
writer.select(1002);
// TODO: Mind negated hostnames, they are currently discarded.
for ( let hn of parsed.hostnames ) {
for ( const hn of parsed.hostnames ) {
if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn);
if ( parsed.exception ) {
@ -261,13 +278,13 @@
while ( reader.next() ) {
acceptedCount += 1;
let fingerprint = reader.fingerprint();
const fingerprint = reader.fingerprint();
if ( duplicates.has(fingerprint) ) {
discardedCount += 1;
continue;
}
duplicates.add(fingerprint);
let args = reader.args();
const args = reader.args();
filterDB.add(args[1], {
type: args[0],
hostname: args[2],
@ -335,7 +352,7 @@
api.apply = function(doc, details) {
docRegister = doc;
let modified = false;
for ( let entry of details.selectors ) {
for ( const entry of details.selectors ) {
if ( entry.type === 64 ) {
if ( applyCSSSelector(details, entry.selector) ) {
modified = true;
@ -360,19 +377,6 @@
pselectors.clear();
};
Object.defineProperties(api, {
acceptedCount: {
get: function() {
return acceptedCount;
}
},
discardedCount: {
get: function() {
return discardedCount;
}
}
});
return api;
})();

View File

@ -668,10 +668,6 @@
if ( (compiled = compileProceduralSelector(raw)) ) {
return compiled;
}
µb.logger.writeOne({
error: 'Cosmetic filtering invalid filter: ' + raw
});
};
return entryPoint;

View File

@ -2189,8 +2189,9 @@ FilterContainer.prototype.compile = function(raw, writer) {
// Ignore filters with unsupported options
if ( parsed.unsupported ) {
const who = writer.properties.get('assetKey') || '?';
µb.logger.writeOne({
error: 'Network filtering invalid filter: ' + raw
error: `Invalid network filter in ${who}: ${raw}`
});
return false;
}

View File

@ -407,34 +407,39 @@
µBlock.appendUserFilters = function(filters) {
if ( filters.length === 0 ) { return; }
var µb = this;
var onSaved = function() {
var compiledFilters = µb.compileFilters(filters),
snfe = µb.staticNetFilteringEngine,
cfe = µb.cosmeticFilteringEngine,
acceptedCount = snfe.acceptedCount + cfe.acceptedCount,
discardedCount = snfe.discardedCount + cfe.discardedCount;
µb.applyCompiledFilters(compiledFilters, true);
var entry = µb.availableFilterLists[µb.userFiltersPath],
deltaEntryCount = snfe.acceptedCount + cfe.acceptedCount - acceptedCount,
deltaEntryUsedCount = deltaEntryCount - (snfe.discardedCount + cfe.discardedCount - discardedCount);
const onSaved = ( ) => {
const compiledFilters = this.compileFilters(
filters,
{ assetKey: this.userFiltersPath }
);
const snfe = this.staticNetFilteringEngine;
const cfe = this.cosmeticFilteringEngine;
const acceptedCount = snfe.acceptedCount + cfe.acceptedCount;
const discardedCount = snfe.discardedCount + cfe.discardedCount;
this.applyCompiledFilters(compiledFilters, true);
const entry = this.availableFilterLists[this.userFiltersPath];
const deltaEntryCount =
snfe.acceptedCount +
cfe.acceptedCount - acceptedCount;
const deltaEntryUsedCount =
deltaEntryCount -
(snfe.discardedCount + cfe.discardedCount - discardedCount);
entry.entryCount += deltaEntryCount;
entry.entryUsedCount += deltaEntryUsedCount;
vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists });
µb.staticNetFilteringEngine.freeze();
µb.redirectEngine.freeze();
µb.staticExtFilteringEngine.freeze();
µb.selfieManager.destroy();
vAPI.storage.set({ 'availableFilterLists': this.availableFilterLists });
this.staticNetFilteringEngine.freeze();
this.redirectEngine.freeze();
this.staticExtFilteringEngine.freeze();
this.selfieManager.destroy();
};
var onLoaded = function(details) {
const onLoaded = details => {
if ( details.error ) { return; }
// https://github.com/chrisaljoudi/uBlock/issues/976
// If we reached this point, the filter quite probably needs to be
// added for sure: do not try to be too smart, trying to avoid
// duplicates at this point may lead to more issues.
µb.saveUserFilters(details.content.trim() + '\n\n' + filters.trim(), onSaved);
this.saveUserFilters(details.content.trim() + '\n\n' + filters.trim(), onSaved);
};
this.loadUserFilters(onLoaded);
@ -704,7 +709,10 @@
var onCompiledListLoaded2 = function(details) {
if ( details.content === '' ) {
details.content = µb.compileFilters(rawContent);
details.content = µb.compileFilters(
rawContent,
{ assetKey: assetKey }
);
µb.assets.put(compiledPath, details.content);
}
rawContent = undefined;
@ -786,19 +794,27 @@
/******************************************************************************/
µBlock.compileFilters = function(rawText) {
µBlock.compileFilters = function(rawText, details) {
let writer = new this.CompiledLineIO.Writer();
// Populate the writer with information potentially useful to the
// client compilers.
if ( details ) {
if ( details.assetKey ) {
writer.properties.set('assetKey', details.assetKey);
}
}
// Useful references:
// https://adblockplus.org/en/filter-cheatsheet
// https://adblockplus.org/en/filters
let staticNetFilteringEngine = this.staticNetFilteringEngine,
staticExtFilteringEngine = this.staticExtFilteringEngine,
reIsWhitespaceChar = /\s/,
reMaybeLocalIp = /^[\d:f]/,
reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/,
reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/,
lineIter = new this.LineIterator(this.processDirectives(rawText));
const staticNetFilteringEngine = this.staticNetFilteringEngine;
const staticExtFilteringEngine = this.staticExtFilteringEngine;
const reIsWhitespaceChar = /\s/;
const reMaybeLocalIp = /^[\d:f]/;
const reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/;
const reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/;
const lineIter = new this.LineIterator(this.processDirectives(rawText));
while ( lineIter.eot() === false ) {
// rhill 2014-04-18: The trim is important here, as without it there
@ -808,7 +824,7 @@
if ( line.length === 0 ) { continue; }
// Strip comments
let c = line.charAt(0);
const c = line.charAt(0);
if ( c === '!' || c === '[' ) { continue; }
// Parse or skip cosmetic filters
@ -827,7 +843,7 @@
// Don't remove:
// ...#blah blah blah
// because some ABP filters uses the `#` character (URL fragment)
let pos = line.indexOf('#');
const pos = line.indexOf('#');
if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) {
line = line.slice(0, pos).trim();
}
@ -1259,7 +1275,10 @@
);
this.assets.put(
'compiled/' + details.assetKey,
this.compileFilters(details.content)
this.compileFilters(
details.content,
{ assetKey: details.assetKey }
)
);
}
} else {

View File

@ -234,8 +234,9 @@
this.io = µBlock.CompiledLineIO;
this.blockId = undefined;
this.block = undefined;
this.blocks = new Map();
this.stringifier = this.io.serialize;
this.blocks = new Map();
this.properties = new Map();
},
Reader: function(raw, blockId) {
@ -246,6 +247,7 @@
this.line = '';
this.parser = this.io.unserialize;
this.blocks = new Map();
this.properties = new Map();
let reBlockStart = new RegExp(
'^' + this.io.blockStartPrefix + '(\\d+)\\n',
'gm'