From 76d70102f069856bffac7cd27dc40500c3bb9563 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Fri, 2 Dec 2022 15:43:04 -0500 Subject: [PATCH] Add pseudo-operator `:matches-attr()` Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/2329 The supported syntax is exactly as per AdGuard's documentation: - https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#extended-css-matches-attr Though recommended, the quotes are not mandatory in uBO if the argument does not cause the parser to fail and if there are no ambiguities. Additionally, improved the code to better unquote pseudo-operator arguments, and to bring it closer to how AdGuard does it as per documentation. When using quotes, `"` and `\` should be escaped to preserve these characters in the unquoted version of the argument. Additionally, it is now possible to have `:has-text()` match the empty string by just quoting the empty string: ...##foo:has-text("") --- src/js/contentscript-extra.js | 30 ++++++++++++ src/js/static-filtering-parser.js | 78 ++++++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 12 deletions(-) diff --git a/src/js/contentscript-extra.js b/src/js/contentscript-extra.js index ac3d204c9..277c8f27a 100644 --- a/src/js/contentscript-extra.js +++ b/src/js/contentscript-extra.js @@ -34,6 +34,19 @@ const nonVisualElements = { style: true, }; +const regexFromString = (s, exact = false) => { + if ( s === '' ) { return /^/; } + if ( /^".+"$/.test(s) ) { + s = s.slice(1,-1).replace(/\\(\\|")/g, '$1'); + } + const match = /^\/(.+)\/([i]?)$/.exec(s); + if ( match !== null ) { + return new RegExp(match[1], match[2] || undefined); + } + const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(exact ? `^${reStr}$` : reStr, 'i'); +}; + // 'P' stands for 'Procedural' class PSelectorTask { @@ -85,6 +98,22 @@ class PSelectorIfNotTask extends PSelectorIfTask { } PSelectorIfNotTask.prototype.target = false; +class PSelectorMatchesAttrTask extends PSelectorTask { + constructor(task) { + super(); + this.reAttr = regexFromString(task[1].attr, true); + this.reValue = regexFromString(task[1].value, true); + } + transpose(node, output) { + const attrs = node.getAttributeNames(); + for ( const attr of attrs ) { + if ( this.reAttr.test(attr) === false ) { continue; } + if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; } + output.push(node); + } + } +} + class PSelectorMatchesCSSTask extends PSelectorTask { constructor(task) { super(); @@ -350,6 +379,7 @@ class PSelector { [ 'has-text', PSelectorHasTextTask ], [ 'if', PSelectorIfTask ], [ 'if-not', PSelectorIfNotTask ], + [ 'matches-attr', PSelectorMatchesAttrTask ], [ 'matches-css', PSelectorMatchesCSSTask ], [ 'matches-css-after', PSelectorMatchesCSSAfterTask ], [ 'matches-css-before', PSelectorMatchesCSSBeforeTask ], diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index e227802fc..af3fec451 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -1361,6 +1361,7 @@ Parser.prototype.SelectorCompiler = class { 'has-text', 'if', 'if-not', + 'matches-attr', 'matches-css', 'matches-css-after', 'matches-css-before', @@ -1855,6 +1856,8 @@ Parser.prototype.SelectorCompiler = class { return this.compileSelector(arg); case 'if-not': return this.compileSelector(arg); + case 'matches-attr': + return this.compileMatchAttrArgument(arg); case 'matches-css': return this.compileCSSDeclaration(arg); case 'matches-css-after': @@ -1894,11 +1897,58 @@ Parser.prototype.SelectorCompiler = class { return false; } - extractArg(s) { - if ( /^(['"]).+\1$/.test(s) ) { - s = s.slice(1, -1); + unquoteString(s) { + const end = s.length; + if ( end === 0 ) { + return { s: '', end }; } - return s.replace(/\\(['"])/g, '$1'); + if ( /^['"]/.test(s) === false ) { + return { s, i: end }; + } + const quote = s.charCodeAt(0); + const out = []; + let i = 1, c = 0; + for (;;) { + c = s.charCodeAt(i); + if ( c === quote ) { + i += 1; + break; + } + if ( c === 0x5C /* '\\' */ ) { + i += 1; + if ( i === end ) { break; } + c = s.charCodeAt(i); + if ( c !== 0x5C && c !== quote ) { + out.push('\\'); + } + } + out.push(c); + i += 1; + if ( i === end ) { break; } + } + return { s: String.fromCharCode(...out), i }; + } + + compileMatchAttrArgument(s) { + if ( s === '' ) { return; } + let attr = '', value = ''; + let r = this.unquoteString(s); + if ( r.i === s.length ) { + const pos = r.s.indexOf('='); + if ( pos === -1 ) { + attr = r.s; + } else { + attr = r.s.slice(0, pos); + value = r.s.slice(pos + 1); + } + } else { + attr = r.s; + if ( s.charCodeAt(r.i) !== 0x3D ) { return; } + r = this.unquoteString(s.slice(r.i+1)); + value = r.s; + } + if ( attr === '' ) { return; } + return { attr, value }; } // When dealing with literal text, we must first eat _some_ @@ -1906,8 +1956,9 @@ Parser.prototype.SelectorCompiler = class { // Remove potentially present quotes before processing. compileText(s) { if ( s === '' ) { return; } - s = this.extractArg(s); - const match = this.reParseRegexLiteral.exec(s); + const r = this.unquoteString(s); + if ( r.i !== s.length ) { return; } + const match = this.reParseRegexLiteral.exec(r.s); let regexDetails; if ( match !== null ) { regexDetails = match[1]; @@ -1915,10 +1966,12 @@ Parser.prototype.SelectorCompiler = class { if ( match[2] ) { regexDetails = [ regexDetails, match[2] ]; } + } else if ( r.s === '' ) { + regexDetails = '^$'; } else { - regexDetails = s.replace(this.reEatBackslashes, '$1') - .replace(this.reEscapeRegex, '\\$&'); - this.regexToRawValue.set(regexDetails, s); + regexDetails = r.s.replace(this.reEatBackslashes, '$1') + .replace(this.reEscapeRegex, '\\$&'); + this.regexToRawValue.set(regexDetails, r.s); } return regexDetails; } @@ -2010,13 +2063,14 @@ Parser.prototype.SelectorCompiler = class { } compileXpathExpression(s) { - s = this.extractArg(s); + const r = this.unquoteString(s); + if ( r.i !== s.length ) { return; } try { - self.document.createExpression(s, null); + self.document.createExpression(r.s, null); } catch (e) { return; } - return s; + return r.s; } };