From 261ef8c510fd91ead57948d1f7793a7a5e2a25fd Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sat, 15 Dec 2018 10:46:17 -0500 Subject: [PATCH] Add support for procedural :not to HTML filtering Related issue: Additionally, improve compile-time error reporting in the logger --- src/js/cosmetic-filtering.js | 132 ++++++++++++++------------------- src/js/html-filtering.js | 100 +++++++++++++------------ src/js/static-ext-filtering.js | 4 - src/js/static-net-filtering.js | 3 +- src/js/storage.js | 81 ++++++++++++-------- src/js/utils.js | 4 +- 6 files changed, 164 insertions(+), 160 deletions(-) diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 3b9d0f184..f2754a5e6 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -361,25 +361,6 @@ let FilterContainer = function() { this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g; this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/; this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/; - this.reNeedHostname = new RegExp([ - '^', - '(?:', - [ - '.+?:has', - '.+?:has-text', - '.+?:if', - '.+?:if-not', - '.+?:matches-css(?:-before|-after)?', - '.*?:xpath', - '.+?:style', - '.+?:-abp-contains', // ABP-specific for `:has-text` - '.+?:-abp-has', // ABP-specific for `:if` - '.+?:contains' // Adguard-specific for `:has-text` - ].join('|'), - ')', - '\\(.+\\)', - '$' - ].join('')); this.selectorCache = new Map(); this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes @@ -584,76 +565,65 @@ FilterContainer.prototype.compileGenericHideSelector = function( writer ) { const selector = parsed.suffix; + const type = selector.charCodeAt(0); + let key; - // For some selectors, it is mandatory to have a hostname or entity: - // ##.foo:-abp-contains(...) - // ##.foo:-abp-has(...) - // ##.foo:contains(...) - // ##.foo:has(...) - // ##.foo:has-text(...) - // ##.foo:if(...) - // ##.foo:if-not(...) - // ##.foo:matches-css(...) - // ##.foo:matches-css-after(...) - // ##.foo:matches-css-before(...) - // ##:xpath(...) - // ##.foo:style(...) - if ( this.reNeedHostname.test(selector) ) { + if ( type === 0x23 /* '#' */ ) { + key = this.keyFromSelector(selector); + // Simple selector-based CSS rule: no need to test for whether the + // selector is valid, the regex took care of this. Most generic + // selector falls into that category. + // - ###ad-bigbox + if ( key === selector ) { + writer.push([ 0, key.slice(1) ]); + return; + } + } else if ( type === 0x2E /* '.' */ ) { + key = this.keyFromSelector(selector); + // Simple selector-based CSS rule: no need to test for whether the + // selector is valid, the regex took care of this. Most generic + // selector falls into that category. + // - ##.ads-bigbox + if ( key === selector ) { + writer.push([ 2, key.slice(1) ]); + return; + } + } + + const compiled = µb.staticExtFilteringEngine.compileSelector(selector); + + // Invalid cosmetic filter, possible reasons: + // - Bad syntax + // - Procedural filters (can't be generic): the compiled version of + // a procedural selector is NEVER equal to its raw version. + if ( compiled === undefined || compiled !== selector ) { + const who = writer.properties.get('assetKey') || '?'; µb.logger.writeOne({ - error: 'Cosmetic filtering – invalid generic filter: ##' + selector + error: `Invalid generic cosmetic filter in ${who} : ##${selector}` }); return; } - let type = selector.charCodeAt(0); - - if ( type === 0x23 /* '#' */ ) { - const key = this.keyFromSelector(selector); - if ( key === undefined ) { return; } - // Simple selector-based CSS rule: no need to test for whether the - // selector is valid, the regex took care of this. Most generic - // selector falls into that category. - if ( key === selector ) { - writer.push([ 0 /* lg */, key.slice(1) ]); - return; - } - // Complex selector-based CSS rule. - if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) { - writer.push([ 1 /* lg+ */, key.slice(1), selector ]); - } + // Complex selector-based CSS rule: + // - ###tads + div + .c + // - ##.rscontainer > .ellip + if ( key !== undefined ) { + writer.push([ + type === 0x23 /* '#' */ ? 1 : 3, + key.slice(1), + selector ] + ); return; } - if ( type === 0x2E /* '.' */ ) { - const key = this.keyFromSelector(selector); - if ( key === undefined ) { return; } - // Simple selector-based CSS rule: no need to test for whether the - // selector is valid, the regex took care of this. Most generic - // selector falls into that category. - if ( key === selector ) { - writer.push([ 2 /* lg */, key.slice(1) ]); - return; - } - // Complex selector-based CSS rule. - if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) { - writer.push([ 3 /* lg+ */, key.slice(1), selector ]); - } - return; - } - - const compiled = µb.staticExtFilteringEngine.compileSelector(selector); - if ( compiled === undefined ) { return; } - // TODO: Detect and error on procedural cosmetic filters. - // https://github.com/gorhill/uBlock/issues/909 // Anything which contains a plain id/class selector can be classified // as a low generic cosmetic filter. const matches = this.rePlainSelectorEx.exec(selector); if ( matches !== null ) { const key = matches[1] || matches[2]; - type = key.charCodeAt(0); writer.push([ - type === 0x23 ? 1 : 3 /* lg+ */, + key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3, key.slice(1), selector ]); @@ -685,7 +655,13 @@ FilterContainer.prototype.compileGenericUnhideSelector = function( ) { // Procedural cosmetic filters are acceptable as generic exception filters. let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); - if ( compiled === undefined ) { return; } + if ( compiled === undefined ) { + const who = writer.properties.get('assetKey') || '?'; + µb.logger.writeOne({ + error: `Invalid cosmetic filter in ${who} : #@#${parsed.suffix}` + }); + return; + } // https://github.com/chrisaljoudi/uBlock/issues/497 // All generic exception filters are put in the same bucket: they are @@ -708,7 +684,13 @@ FilterContainer.prototype.compileSpecificSelector = function( } let compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix); - if ( compiled === undefined ) { return; } + if ( compiled === undefined ) { + const who = writer.properties.get('assetKey') || '?'; + µb.logger.writeOne({ + error: `Invalid cosmetic filter in ${who} : ##${parsed.suffix}` + }); + return; + } let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hostname); diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index b27e09cd2..0863eec8e 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -24,16 +24,24 @@ /******************************************************************************/ µBlock.htmlFilteringEngine = (function() { - const api = {}; + const µb = µBlock; + const pselectors = new Map(); + const duplicates = new Set(); - const µb = µBlock, - pselectors = new Map(), - duplicates = new Set(); let filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(), acceptedCount = 0, discardedCount = 0, docRegister; + const api = { + get acceptedCount() { + return acceptedCount; + }, + get discardedCount() { + return discardedCount; + } + }; + const PSelectorHasTextTask = function(task) { let arg0 = task[1], arg1; if ( Array.isArray(task[1]) ) { @@ -42,8 +50,8 @@ this.needle = new RegExp(arg0, arg1); }; PSelectorHasTextTask.prototype.exec = function(input) { - let output = []; - for ( let node of input ) { + const output = []; + for ( const node of input ) { if ( this.needle.test(node.textContent) ) { output.push(node); } @@ -61,8 +69,8 @@ } }); PSelectorIfTask.prototype.exec = function(input) { - let output = []; - for ( let node of input ) { + const output = []; + for ( const node of input ) { if ( this.pselector.test(node) === this.target ) { output.push(node); } @@ -81,10 +89,10 @@ this.xpe = task[1]; }; PSelectorXpathTask.prototype.exec = function(input) { - let output = [], - xpe = docRegister.createExpression(this.xpe, null), - xpr = null; - for ( let node of input ) { + const output = []; + const xpe = docRegister.createExpression(this.xpe, null); + let xpr = null; + for ( const node of input ) { xpr = xpe.evaluate( node, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, @@ -92,7 +100,7 @@ ); let j = xpr.snapshotLength; while ( j-- ) { - node = xpr.snapshotItem(j); + const node = xpr.snapshotItem(j); if ( node.nodeType === 1 ) { output.push(node); } @@ -108,6 +116,7 @@ [ ':has-text', PSelectorHasTextTask ], [ ':if', PSelectorIfTask ], [ ':if-not', PSelectorIfNotTask ], + [ ':not', PSelectorIfNotTask ], [ ':xpath', PSelectorXpathTask ] ]); } @@ -115,13 +124,13 @@ this.selector = o.selector; this.tasks = []; if ( !o.tasks ) { return; } - for ( let task of o.tasks ) { - let ctor = this.operatorToTaskMap.get(task[0]); + for ( const task of o.tasks ) { + const ctor = this.operatorToTaskMap.get(task[0]); if ( ctor === undefined ) { this.invalid = true; break; } - let pselector = new ctor(task); + const pselector = new ctor(task); if ( pselector instanceof PSelectorIfTask && pselector.invalid ) { this.invalid = true; break; @@ -132,7 +141,7 @@ PSelector.prototype.operatorToTaskMap = undefined; PSelector.prototype.invalid = false; PSelector.prototype.prime = function(input) { - let root = input || docRegister; + const root = input || docRegister; if ( this.selector !== '' ) { return root.querySelectorAll(this.selector); } @@ -141,7 +150,7 @@ PSelector.prototype.exec = function(input) { if ( this.invalid ) { return []; } let nodes = this.prime(input); - for ( let task of this.tasks ) { + for ( const task of this.tasks ) { if ( nodes.length === 0 ) { break; } nodes = task.exec(nodes); } @@ -149,10 +158,12 @@ }; PSelector.prototype.test = function(input) { if ( this.invalid ) { return false; } - let nodes = this.prime(input), AA = [ null ], aa; - for ( let node of nodes ) { - AA[0] = node; aa = AA; - for ( var task of this.tasks ) { + const nodes = this.prime(input); + const AA = [ null ]; + for ( const node of nodes ) { + AA[0] = node; + let aa = AA; + for ( const task of this.tasks ) { aa = task.exec(aa); if ( aa.length === 0 ) { break; } } @@ -182,11 +193,11 @@ pselector = new PSelector(JSON.parse(selector)); pselectors.set(selector, pselector); } - let nodes = pselector.exec(), - i = nodes.length, + const nodes = pselector.exec(); + let i = nodes.length, modified = false; while ( i-- ) { - let node = nodes[i]; + const node = nodes[i]; if ( node.parentNode !== null ) { node.parentNode.removeChild(node); modified = true; @@ -199,11 +210,11 @@ }; const applyCSSSelector = function(details, selector) { - let nodes = docRegister.querySelectorAll(selector), - i = nodes.length, + const nodes = docRegister.querySelectorAll(selector); + let i = nodes.length, modified = false; while ( i-- ) { - let node = nodes[i]; + const node = nodes[i]; if ( node.parentNode !== null ) { node.parentNode.removeChild(node); modified = true; @@ -228,16 +239,22 @@ }; api.compile = function(parsed, writer) { - let selector = parsed.suffix.slice(1).trim(), - compiled = µb.staticExtFilteringEngine.compileSelector(selector); - if ( compiled === undefined ) { return; } + const selector = parsed.suffix.slice(1).trim(); + const compiled = µb.staticExtFilteringEngine.compileSelector(selector); + if ( compiled === undefined ) { + const who = writer.properties.get('assetKey') || '?'; + µb.logger.writeOne({ + error: `Invalid HTML filter in ${who} : ##${selector}` + }); + return; + } // 1002 = html filtering writer.select(1002); // TODO: Mind negated hostnames, they are currently discarded. - for ( let hn of parsed.hostnames ) { + for ( const hn of parsed.hostnames ) { if ( hn.charCodeAt(0) === 0x7E /* '~' */ ) { continue; } let hash = µb.staticExtFilteringEngine.compileHostnameToHash(hn); if ( parsed.exception ) { @@ -261,13 +278,13 @@ while ( reader.next() ) { acceptedCount += 1; - let fingerprint = reader.fingerprint(); + const fingerprint = reader.fingerprint(); if ( duplicates.has(fingerprint) ) { discardedCount += 1; continue; } duplicates.add(fingerprint); - let args = reader.args(); + const args = reader.args(); filterDB.add(args[1], { type: args[0], hostname: args[2], @@ -335,7 +352,7 @@ api.apply = function(doc, details) { docRegister = doc; let modified = false; - for ( let entry of details.selectors ) { + for ( const entry of details.selectors ) { if ( entry.type === 64 ) { if ( applyCSSSelector(details, entry.selector) ) { modified = true; @@ -360,19 +377,6 @@ pselectors.clear(); }; - Object.defineProperties(api, { - acceptedCount: { - get: function() { - return acceptedCount; - } - }, - discardedCount: { - get: function() { - return discardedCount; - } - } - }); - return api; })(); diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js index abfa85c14..161ac7696 100644 --- a/src/js/static-ext-filtering.js +++ b/src/js/static-ext-filtering.js @@ -668,10 +668,6 @@ if ( (compiled = compileProceduralSelector(raw)) ) { return compiled; } - - µb.logger.writeOne({ - error: 'Cosmetic filtering – invalid filter: ' + raw - }); }; return entryPoint; diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 30b788639..b78239e5a 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2189,8 +2189,9 @@ FilterContainer.prototype.compile = function(raw, writer) { // Ignore filters with unsupported options if ( parsed.unsupported ) { + const who = writer.properties.get('assetKey') || '?'; µb.logger.writeOne({ - error: 'Network filtering – invalid filter: ' + raw + error: `Invalid network filter in ${who}: ${raw}` }); return false; } diff --git a/src/js/storage.js b/src/js/storage.js index 4faca6795..9bc873428 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -407,34 +407,39 @@ µBlock.appendUserFilters = function(filters) { if ( filters.length === 0 ) { return; } - var µb = this; - - var onSaved = function() { - var compiledFilters = µb.compileFilters(filters), - snfe = µb.staticNetFilteringEngine, - cfe = µb.cosmeticFilteringEngine, - acceptedCount = snfe.acceptedCount + cfe.acceptedCount, - discardedCount = snfe.discardedCount + cfe.discardedCount; - µb.applyCompiledFilters(compiledFilters, true); - var entry = µb.availableFilterLists[µb.userFiltersPath], - deltaEntryCount = snfe.acceptedCount + cfe.acceptedCount - acceptedCount, - deltaEntryUsedCount = deltaEntryCount - (snfe.discardedCount + cfe.discardedCount - discardedCount); + const onSaved = ( ) => { + const compiledFilters = this.compileFilters( + filters, + { assetKey: this.userFiltersPath } + ); + const snfe = this.staticNetFilteringEngine; + const cfe = this.cosmeticFilteringEngine; + const acceptedCount = snfe.acceptedCount + cfe.acceptedCount; + const discardedCount = snfe.discardedCount + cfe.discardedCount; + this.applyCompiledFilters(compiledFilters, true); + const entry = this.availableFilterLists[this.userFiltersPath]; + const deltaEntryCount = + snfe.acceptedCount + + cfe.acceptedCount - acceptedCount; + const deltaEntryUsedCount = + deltaEntryCount - + (snfe.discardedCount + cfe.discardedCount - discardedCount); entry.entryCount += deltaEntryCount; entry.entryUsedCount += deltaEntryUsedCount; - vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists }); - µb.staticNetFilteringEngine.freeze(); - µb.redirectEngine.freeze(); - µb.staticExtFilteringEngine.freeze(); - µb.selfieManager.destroy(); + vAPI.storage.set({ 'availableFilterLists': this.availableFilterLists }); + this.staticNetFilteringEngine.freeze(); + this.redirectEngine.freeze(); + this.staticExtFilteringEngine.freeze(); + this.selfieManager.destroy(); }; - var onLoaded = function(details) { + const onLoaded = details => { if ( details.error ) { return; } // https://github.com/chrisaljoudi/uBlock/issues/976 // If we reached this point, the filter quite probably needs to be // added for sure: do not try to be too smart, trying to avoid // duplicates at this point may lead to more issues. - µb.saveUserFilters(details.content.trim() + '\n\n' + filters.trim(), onSaved); + this.saveUserFilters(details.content.trim() + '\n\n' + filters.trim(), onSaved); }; this.loadUserFilters(onLoaded); @@ -704,7 +709,10 @@ var onCompiledListLoaded2 = function(details) { if ( details.content === '' ) { - details.content = µb.compileFilters(rawContent); + details.content = µb.compileFilters( + rawContent, + { assetKey: assetKey } + ); µb.assets.put(compiledPath, details.content); } rawContent = undefined; @@ -786,19 +794,27 @@ /******************************************************************************/ -µBlock.compileFilters = function(rawText) { +µBlock.compileFilters = function(rawText, details) { let writer = new this.CompiledLineIO.Writer(); + // Populate the writer with information potentially useful to the + // client compilers. + if ( details ) { + if ( details.assetKey ) { + writer.properties.set('assetKey', details.assetKey); + } + } + // Useful references: // https://adblockplus.org/en/filter-cheatsheet // https://adblockplus.org/en/filters - let staticNetFilteringEngine = this.staticNetFilteringEngine, - staticExtFilteringEngine = this.staticExtFilteringEngine, - reIsWhitespaceChar = /\s/, - reMaybeLocalIp = /^[\d:f]/, - reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/, - reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/, - lineIter = new this.LineIterator(this.processDirectives(rawText)); + const staticNetFilteringEngine = this.staticNetFilteringEngine; + const staticExtFilteringEngine = this.staticExtFilteringEngine; + const reIsWhitespaceChar = /\s/; + const reMaybeLocalIp = /^[\d:f]/; + const reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/; + const reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/; + const lineIter = new this.LineIterator(this.processDirectives(rawText)); while ( lineIter.eot() === false ) { // rhill 2014-04-18: The trim is important here, as without it there @@ -808,7 +824,7 @@ if ( line.length === 0 ) { continue; } // Strip comments - let c = line.charAt(0); + const c = line.charAt(0); if ( c === '!' || c === '[' ) { continue; } // Parse or skip cosmetic filters @@ -827,7 +843,7 @@ // Don't remove: // ...#blah blah blah // because some ABP filters uses the `#` character (URL fragment) - let pos = line.indexOf('#'); + const pos = line.indexOf('#'); if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) { line = line.slice(0, pos).trim(); } @@ -1259,7 +1275,10 @@ ); this.assets.put( 'compiled/' + details.assetKey, - this.compileFilters(details.content) + this.compileFilters( + details.content, + { assetKey: details.assetKey } + ) ); } } else { diff --git a/src/js/utils.js b/src/js/utils.js index e5d166f58..a9193e78b 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -234,8 +234,9 @@ this.io = µBlock.CompiledLineIO; this.blockId = undefined; this.block = undefined; - this.blocks = new Map(); this.stringifier = this.io.serialize; + this.blocks = new Map(); + this.properties = new Map(); }, Reader: function(raw, blockId) { @@ -246,6 +247,7 @@ this.line = ''; this.parser = this.io.unserialize; this.blocks = new Map(); + this.properties = new Map(); let reBlockStart = new RegExp( '^' + this.io.blockStartPrefix + '(\\d+)\\n', 'gm'