Add pseudo-operator `:matches-attr()`

Related issue:
- https://github.com/uBlockOrigin/uBlock-issues/issues/2329

The supported syntax is exactly as per AdGuard's documentation:
- https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#extended-css-matches-attr

Though recommended, the quotes are not mandatory in uBO if
the argument does not cause the parser to fail and if there
are no ambiguities.

Additionally, improved the code to better unquote pseudo-operator
arguments, and to bring it closer to how AdGuard does it as per
documentation. When using quotes, `"` and `\` should be escaped
to preserve these characters in the unquoted version of the
argument.

Additionally, it is now possible to have `:has-text()` match the
empty string by just quoting the empty string:

    ...##foo:has-text("")
This commit is contained in:
Raymond Hill 2022-12-02 15:43:04 -05:00
parent 6140e55589
commit 76d70102f0
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 96 additions and 12 deletions

View File

@ -34,6 +34,19 @@ const nonVisualElements = {
style: true,
};
const regexFromString = (s, exact = false) => {
if ( s === '' ) { return /^/; }
if ( /^".+"$/.test(s) ) {
s = s.slice(1,-1).replace(/\\(\\|")/g, '$1');
}
const match = /^\/(.+)\/([i]?)$/.exec(s);
if ( match !== null ) {
return new RegExp(match[1], match[2] || undefined);
}
const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(exact ? `^${reStr}$` : reStr, 'i');
};
// 'P' stands for 'Procedural'
class PSelectorTask {
@ -85,6 +98,22 @@ class PSelectorIfNotTask extends PSelectorIfTask {
}
PSelectorIfNotTask.prototype.target = false;
class PSelectorMatchesAttrTask extends PSelectorTask {
constructor(task) {
super();
this.reAttr = regexFromString(task[1].attr, true);
this.reValue = regexFromString(task[1].value, true);
}
transpose(node, output) {
const attrs = node.getAttributeNames();
for ( const attr of attrs ) {
if ( this.reAttr.test(attr) === false ) { continue; }
if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; }
output.push(node);
}
}
}
class PSelectorMatchesCSSTask extends PSelectorTask {
constructor(task) {
super();
@ -350,6 +379,7 @@ class PSelector {
[ 'has-text', PSelectorHasTextTask ],
[ 'if', PSelectorIfTask ],
[ 'if-not', PSelectorIfNotTask ],
[ 'matches-attr', PSelectorMatchesAttrTask ],
[ 'matches-css', PSelectorMatchesCSSTask ],
[ 'matches-css-after', PSelectorMatchesCSSAfterTask ],
[ 'matches-css-before', PSelectorMatchesCSSBeforeTask ],

View File

@ -1361,6 +1361,7 @@ Parser.prototype.SelectorCompiler = class {
'has-text',
'if',
'if-not',
'matches-attr',
'matches-css',
'matches-css-after',
'matches-css-before',
@ -1855,6 +1856,8 @@ Parser.prototype.SelectorCompiler = class {
return this.compileSelector(arg);
case 'if-not':
return this.compileSelector(arg);
case 'matches-attr':
return this.compileMatchAttrArgument(arg);
case 'matches-css':
return this.compileCSSDeclaration(arg);
case 'matches-css-after':
@ -1894,11 +1897,58 @@ Parser.prototype.SelectorCompiler = class {
return false;
}
extractArg(s) {
if ( /^(['"]).+\1$/.test(s) ) {
s = s.slice(1, -1);
unquoteString(s) {
const end = s.length;
if ( end === 0 ) {
return { s: '', end };
}
return s.replace(/\\(['"])/g, '$1');
if ( /^['"]/.test(s) === false ) {
return { s, i: end };
}
const quote = s.charCodeAt(0);
const out = [];
let i = 1, c = 0;
for (;;) {
c = s.charCodeAt(i);
if ( c === quote ) {
i += 1;
break;
}
if ( c === 0x5C /* '\\' */ ) {
i += 1;
if ( i === end ) { break; }
c = s.charCodeAt(i);
if ( c !== 0x5C && c !== quote ) {
out.push('\\');
}
}
out.push(c);
i += 1;
if ( i === end ) { break; }
}
return { s: String.fromCharCode(...out), i };
}
compileMatchAttrArgument(s) {
if ( s === '' ) { return; }
let attr = '', value = '';
let r = this.unquoteString(s);
if ( r.i === s.length ) {
const pos = r.s.indexOf('=');
if ( pos === -1 ) {
attr = r.s;
} else {
attr = r.s.slice(0, pos);
value = r.s.slice(pos + 1);
}
} else {
attr = r.s;
if ( s.charCodeAt(r.i) !== 0x3D ) { return; }
r = this.unquoteString(s.slice(r.i+1));
value = r.s;
}
if ( attr === '' ) { return; }
return { attr, value };
}
// When dealing with literal text, we must first eat _some_
@ -1906,8 +1956,9 @@ Parser.prototype.SelectorCompiler = class {
// Remove potentially present quotes before processing.
compileText(s) {
if ( s === '' ) { return; }
s = this.extractArg(s);
const match = this.reParseRegexLiteral.exec(s);
const r = this.unquoteString(s);
if ( r.i !== s.length ) { return; }
const match = this.reParseRegexLiteral.exec(r.s);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
@ -1915,10 +1966,12 @@ Parser.prototype.SelectorCompiler = class {
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else if ( r.s === '' ) {
regexDetails = '^$';
} else {
regexDetails = s.replace(this.reEatBackslashes, '$1')
.replace(this.reEscapeRegex, '\\$&');
this.regexToRawValue.set(regexDetails, s);
regexDetails = r.s.replace(this.reEatBackslashes, '$1')
.replace(this.reEscapeRegex, '\\$&');
this.regexToRawValue.set(regexDetails, r.s);
}
return regexDetails;
}
@ -2010,13 +2063,14 @@ Parser.prototype.SelectorCompiler = class {
}
compileXpathExpression(s) {
s = this.extractArg(s);
const r = this.unquoteString(s);
if ( r.i !== s.length ) { return; }
try {
self.document.createExpression(s, null);
self.document.createExpression(r.s, null);
} catch (e) {
return;
}
return s;
return r.s;
}
};