Add support for regex-based values for `domain=`/`from=`/`to=` options

Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234

Example of usage:

    @@*$ghide,domain=/img[a-z]{3,5}\.buzz/

Regex-based domain values can be negated just like plain or
entity-based values:

    *$domain=~/regex.../

This new syntax does not apply to static extended filters.
This commit is contained in:
Raymond Hill 2023-01-30 17:00:26 -05:00
parent d88ec51b63
commit b1de8d3fe4
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 158 additions and 28 deletions

View File

@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 54, // Increase when compiled format changes
selfieMagic: 54, // Increase when selfie format changes
compiledMagic: 55, // Increase when compiled format changes
selfieMagic: 55, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -1620,11 +1620,11 @@ export class AstFilterParser {
);
switch ( nodeOptionType ) {
case NODE_TYPE_NET_OPTION_NAME_DENYALLOW:
this.linkDown(next, this.parseDomainList(next, '|'), 0b0000);
this.linkDown(next, this.parseDomainList(next, '|'), 0b00000);
break;
case NODE_TYPE_NET_OPTION_NAME_FROM:
case NODE_TYPE_NET_OPTION_NAME_TO:
this.linkDown(next, this.parseDomainList(next, '|', 0b1010));
this.linkDown(next, this.parseDomainList(next, '|', 0b11010));
break;
default:
break;
@ -1642,7 +1642,7 @@ export class AstFilterParser {
return this.getNodeTransform(valueNode);
}
parseDomainList(parent, separator, mode = 0b0000) {
parseDomainList(parent, separator, mode = 0b00000) {
const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
const parentEnd = this.nodes[parent+NODE_END_INDEX];
const containerNode = this.allocTypedNode(
@ -1668,9 +1668,7 @@ export class AstFilterParser {
end = s.indexOf(separator, beg);
} else {
end = s.indexOf('/', beg+1);
end = end !== -1
? s.indexOf(separator, end+1)
: s.indexOf(separator, beg);
end = s.indexOf(separator, end !== -1 ? end+1 : beg);
}
if ( end === -1 ) { end = listEnd; }
if ( end !== beg ) {
@ -1683,8 +1681,9 @@ export class AstFilterParser {
prev = this.linkRight(prev, domainNode);
} else {
domainNode = 0;
if ( this.interactive && separatorNode !== 0 ) {
if ( separatorNode !== 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
}
if ( s.charCodeAt(end) === separatorCode ) {
@ -1696,14 +1695,20 @@ export class AstFilterParser {
parentBeg + end
);
prev = this.linkRight(prev, separatorNode);
if ( this.interactive && domainNode === 0 ) {
if ( domainNode === 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
} else {
separatorNode = 0;
}
beg = end;
}
// Dangling separator node
if ( separatorNode !== 0 ) {
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
this.linkDown(containerNode, this.throwHeadNode(listNode));
return containerNode;
}
@ -1724,12 +1729,13 @@ export class AstFilterParser {
}
if ( beg !== parentEnd ) {
next = this.allocTypedNode(NODE_TYPE_OPTION_VALUE_DOMAIN, beg, parentEnd);
const hn = this.normalizeHostnameValue(this.getNodeString(next), mode);
const hn = this.normalizeDomainValue(this.getNodeString(next), mode);
if ( hn !== undefined ) {
if ( hn !== '' ) {
this.setNodeTransform(next, hn);
} else {
this.addNodeFlags(parent, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
}
if ( head === 0 ) {
@ -1737,10 +1743,32 @@ export class AstFilterParser {
} else {
this.linkRight(head, next);
}
} else {
this.addNodeFlags(parent, NODE_FLAG_ERROR);
this.addFlags(AST_FLAG_HAS_ERROR);
}
return head;
}
// mode bits:
// 0b00001: can use wildcard at any position
// 0b00010: can use entity-based hostnames
// 0b00100: can use single wildcard
// 0b01000: can be negated
// 0b10000: can be a regex
normalizeDomainValue(s, modeBits) {
if ( (modeBits & 0b10000) === 0 ||
s.length <= 2 ||
s.charCodeAt(0) !== 0x2F /* / */ ||
exCharCodeAt(s, -1) !== 0x2F /* / */
) {
return this.normalizeHostnameValue(s, modeBits);
}
const source = this.normalizeRegexPattern(s);
if ( source === '' ) { return ''; }
return `/${source}/`;
}
parseExt(parent, anchorBeg, anchorLen) {
const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
const parentEnd = this.nodes[parent+NODE_END_INDEX];
@ -1756,7 +1784,7 @@ export class AstFilterParser {
);
this.addFlags(AST_FLAG_HAS_OPTIONS);
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
this.linkDown(next, this.parseDomainList(next, ',', 0b1110));
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
prev = this.linkRight(prev, next);
}
next = this.allocTypedNode(
@ -2276,7 +2304,6 @@ export class AstFilterParser {
// 0b00010: can use entity-based hostnames
// 0b00100: can use single wildcard
// 0b01000: can be negated
// 0b10000: can be a regex
//
// returns:
// undefined: no normalization needed, use original hostname

View File

@ -1492,21 +1492,22 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
const hostnameMisses = [];
const entityHits = [];
const entityMisses = [];
const regexHits = [];
const regexMisses = [];
for ( const s of iterable ) {
const len = s.length;
const beg = len > 1 && s.charCodeAt(0) === 0x7E /* '~' */ ? 1 : 0;
if ( len <= beg ) { continue; }
if ( s.endsWith('.*') === false ) {
if ( beg === 0 ) {
hostnameHits.push(s);
} else {
hostnameMisses.push(s.slice(1));
}
} else if ( beg === 0 ) {
entityHits.push(s);
} else {
entityMisses.push(s.slice(1));
if ( s.charCodeAt(beg) === 0x2F /* / */ ) {
if ( beg === 0 ) { regexHits.push(s); continue; }
regexMisses.push(s); continue;
}
if ( s.endsWith('.*') === false ) {
if ( beg === 0 ) { hostnameHits.push(s); continue; }
hostnameMisses.push(s.slice(1)); continue;
}
if ( beg === 0 ) { entityHits.push(s); continue; }
entityMisses.push(s.slice(1)); continue;
}
const toTrie = [];
let trieWhich = 0b00;
@ -1532,6 +1533,9 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
for ( const hn of entityHits ) {
compiledHit.push(ctors[1].compile(hn));
}
for ( const hn of regexHits ) {
compiledHit.push(ctors[3].compile(hn));
}
if ( compiledHit.length > 1 ) {
compiledHit[0] = FilterDomainHitAny.compile(compiledHit.slice());
compiledHit.length = 1;
@ -1550,14 +1554,17 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
const compiledMiss = [];
if ( toTrie.length !== 0 ) {
compiledMiss.push(
ctors[5].compile(toTrie.sort(), trieWhich)
ctors[6].compile(toTrie.sort(), trieWhich)
);
}
for ( const hn of hostnameMisses ) {
compiledMiss.push(ctors[3].compile(hn));
compiledMiss.push(ctors[4].compile(hn));
}
for ( const hn of entityMisses ) {
compiledMiss.push(ctors[4].compile(hn));
compiledMiss.push(ctors[5].compile(hn));
}
for ( const hn of regexMisses ) {
compiledHit.push(ctors[7].compile(hn));
}
if ( prepend ) {
if ( compiledHit.length !== 0 ) {
@ -1749,6 +1756,47 @@ class FilterDomainHitSet {
/******************************************************************************/
class FilterDomainRegexHit {
static getDomainOpt(idata) {
const ref = filterRefs[filterData[idata+1]];
return ref.restr;
}
static match(idata) {
const ref = filterRefs[filterData[idata+1]];
if ( ref.$re === null ) {
ref.$re = new RegExp(ref.restr.slice(1,-1));
}
return ref.$re.test(this.getMatchTarget());
}
static compile(restr) {
return [ this.fid, restr ];
}
static fromCompiled(args) {
const idata = filterDataAllocLen(2);
filterData[idata+0] = args[0]; // fid
filterData[idata+1] = filterRefAdd({ restr: args[1], $re: null });
return idata;
}
static dnrFromCompiled(args, rule) {
rule.condition = rule.condition || {};
const prop = this.dnrConditionName;
if ( rule.condition[prop] === undefined ) {
rule.condition[prop] = [];
}
rule.condition[prop].push(args[1]);
}
static dumpInfo(idata) {
return this.getDomainOpt(idata);
}
}
/******************************************************************************/
// Implement the following filter option:
// - domain=
// - from=
@ -1845,20 +1893,44 @@ class FilterFromDomainMissSet extends FilterFromDomainHitSet {
}
}
class FilterFromRegexHit extends FilterDomainRegexHit {
static getMatchTarget() {
return $docHostname;
}
static logData(idata, details) {
details.fromDomains.push(`${this.getDomainOpt(idata)}`);
}
}
class FilterFromRegexMiss extends FilterFromRegexHit {
static match(idata) {
return super.match(idata) === false;
}
static logData(idata, details) {
details.fromDomains.push(`~${this.getDomainOpt(idata)}`);
}
}
registerFilterClass(FilterFromDomainHit);
registerFilterClass(FilterFromDomainMiss);
registerFilterClass(FilterFromEntityHit);
registerFilterClass(FilterFromEntityMiss);
registerFilterClass(FilterFromDomainHitSet);
registerFilterClass(FilterFromDomainMissSet);
registerFilterClass(FilterFromRegexHit);
registerFilterClass(FilterFromRegexMiss);
const fromOptClasses = [
FilterFromDomainHit,
FilterFromEntityHit,
FilterFromDomainHitSet,
FilterFromRegexHit,
FilterFromDomainMiss,
FilterFromEntityMiss,
FilterFromDomainMissSet,
FilterFromRegexMiss,
];
const compileFromDomainOpt = (...args) => {
@ -1946,20 +2018,44 @@ class FilterToDomainMissSet extends FilterToDomainHitSet {
}
}
class FilterToRegexHit extends FilterDomainRegexHit {
static getMatchTarget() {
return $requestHostname;
}
static logData(idata, details) {
details.toDomains.push(`${this.getDomainOpt(idata)}`);
}
}
class FilterToRegexMiss extends FilterToRegexHit {
static match(idata) {
return super.match(idata) === false;
}
static logData(idata, details) {
details.toDomains.push(`~${this.getDomainOpt(idata)}`);
}
}
registerFilterClass(FilterToDomainHit);
registerFilterClass(FilterToDomainMiss);
registerFilterClass(FilterToEntityHit);
registerFilterClass(FilterToEntityMiss);
registerFilterClass(FilterToDomainHitSet);
registerFilterClass(FilterToDomainMissSet);
registerFilterClass(FilterToRegexHit);
registerFilterClass(FilterToRegexMiss);
const toOptClasses = [
FilterToDomainHit,
FilterToEntityHit,
FilterToDomainHitSet,
FilterToRegexHit,
FilterToDomainMiss,
FilterToEntityMiss,
FilterToDomainMissSet,
FilterToRegexMiss,
];
const compileToDomainOpt = (...args) => {
@ -3678,7 +3774,7 @@ class FilterCompiler {
isJustOrigin() {
if ( this.optionUnitBits !== this.FROM_BIT ) { return false; }
if ( this.isRegex ) { return false; }
if ( this.fromDomainOpt.includes('~') ) { return false; }
if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; }
if ( this.pattern === '*' ) { return true; }
if ( this.anchor !== 0b010 ) { return false; }
if ( /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) { return true; }

View File

@ -1030,7 +1030,14 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
parser.parse(line);
if ( parser.isFilter() === false ) { continue; }
if ( parser.hasError() ) { continue; }
if ( parser.hasError() ) {
logger.writeOne({
realm: 'message',
type: 'error',
text: `Invalid filter: ${parser.raw}`
});
continue;
}
if ( parser.isExtendedFilter() ) {
staticExtFilteringEngine.compile(parser, writer);