mirror of https://github.com/gorhill/uBlock.git
Add support for regex-based values for `domain=`/`from=`/`to=` options
Related discussion: - https://github.com/uBlockOrigin/uBlock-issues/discussions/2234 Example of usage: @@*$ghide,domain=/img[a-z]{3,5}\.buzz/ Regex-based domain values can be negated just like plain or entity-based values: *$domain=~/regex.../ This new syntax does not apply to static extended filters.
This commit is contained in:
parent
d88ec51b63
commit
b1de8d3fe4
|
@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line
|
|||
|
||||
// Read-only
|
||||
systemSettings: {
|
||||
compiledMagic: 54, // Increase when compiled format changes
|
||||
selfieMagic: 54, // Increase when selfie format changes
|
||||
compiledMagic: 55, // Increase when compiled format changes
|
||||
selfieMagic: 55, // Increase when selfie format changes
|
||||
},
|
||||
|
||||
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
|
||||
|
|
|
@ -1620,11 +1620,11 @@ export class AstFilterParser {
|
|||
);
|
||||
switch ( nodeOptionType ) {
|
||||
case NODE_TYPE_NET_OPTION_NAME_DENYALLOW:
|
||||
this.linkDown(next, this.parseDomainList(next, '|'), 0b0000);
|
||||
this.linkDown(next, this.parseDomainList(next, '|'), 0b00000);
|
||||
break;
|
||||
case NODE_TYPE_NET_OPTION_NAME_FROM:
|
||||
case NODE_TYPE_NET_OPTION_NAME_TO:
|
||||
this.linkDown(next, this.parseDomainList(next, '|', 0b1010));
|
||||
this.linkDown(next, this.parseDomainList(next, '|', 0b11010));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -1642,7 +1642,7 @@ export class AstFilterParser {
|
|||
return this.getNodeTransform(valueNode);
|
||||
}
|
||||
|
||||
parseDomainList(parent, separator, mode = 0b0000) {
|
||||
parseDomainList(parent, separator, mode = 0b00000) {
|
||||
const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
|
||||
const parentEnd = this.nodes[parent+NODE_END_INDEX];
|
||||
const containerNode = this.allocTypedNode(
|
||||
|
@ -1668,9 +1668,7 @@ export class AstFilterParser {
|
|||
end = s.indexOf(separator, beg);
|
||||
} else {
|
||||
end = s.indexOf('/', beg+1);
|
||||
end = end !== -1
|
||||
? s.indexOf(separator, end+1)
|
||||
: s.indexOf(separator, beg);
|
||||
end = s.indexOf(separator, end !== -1 ? end+1 : beg);
|
||||
}
|
||||
if ( end === -1 ) { end = listEnd; }
|
||||
if ( end !== beg ) {
|
||||
|
@ -1683,8 +1681,9 @@ export class AstFilterParser {
|
|||
prev = this.linkRight(prev, domainNode);
|
||||
} else {
|
||||
domainNode = 0;
|
||||
if ( this.interactive && separatorNode !== 0 ) {
|
||||
if ( separatorNode !== 0 ) {
|
||||
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
|
||||
this.addFlags(AST_FLAG_HAS_ERROR);
|
||||
}
|
||||
}
|
||||
if ( s.charCodeAt(end) === separatorCode ) {
|
||||
|
@ -1696,14 +1695,20 @@ export class AstFilterParser {
|
|||
parentBeg + end
|
||||
);
|
||||
prev = this.linkRight(prev, separatorNode);
|
||||
if ( this.interactive && domainNode === 0 ) {
|
||||
if ( domainNode === 0 ) {
|
||||
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
|
||||
this.addFlags(AST_FLAG_HAS_ERROR);
|
||||
}
|
||||
} else {
|
||||
separatorNode = 0;
|
||||
}
|
||||
beg = end;
|
||||
}
|
||||
// Dangling separator node
|
||||
if ( separatorNode !== 0 ) {
|
||||
this.addNodeFlags(separatorNode, NODE_FLAG_ERROR);
|
||||
this.addFlags(AST_FLAG_HAS_ERROR);
|
||||
}
|
||||
this.linkDown(containerNode, this.throwHeadNode(listNode));
|
||||
return containerNode;
|
||||
}
|
||||
|
@ -1724,12 +1729,13 @@ export class AstFilterParser {
|
|||
}
|
||||
if ( beg !== parentEnd ) {
|
||||
next = this.allocTypedNode(NODE_TYPE_OPTION_VALUE_DOMAIN, beg, parentEnd);
|
||||
const hn = this.normalizeHostnameValue(this.getNodeString(next), mode);
|
||||
const hn = this.normalizeDomainValue(this.getNodeString(next), mode);
|
||||
if ( hn !== undefined ) {
|
||||
if ( hn !== '' ) {
|
||||
this.setNodeTransform(next, hn);
|
||||
} else {
|
||||
this.addNodeFlags(parent, NODE_FLAG_ERROR);
|
||||
this.addFlags(AST_FLAG_HAS_ERROR);
|
||||
}
|
||||
}
|
||||
if ( head === 0 ) {
|
||||
|
@ -1737,10 +1743,32 @@ export class AstFilterParser {
|
|||
} else {
|
||||
this.linkRight(head, next);
|
||||
}
|
||||
} else {
|
||||
this.addNodeFlags(parent, NODE_FLAG_ERROR);
|
||||
this.addFlags(AST_FLAG_HAS_ERROR);
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
// mode bits:
|
||||
// 0b00001: can use wildcard at any position
|
||||
// 0b00010: can use entity-based hostnames
|
||||
// 0b00100: can use single wildcard
|
||||
// 0b01000: can be negated
|
||||
// 0b10000: can be a regex
|
||||
normalizeDomainValue(s, modeBits) {
|
||||
if ( (modeBits & 0b10000) === 0 ||
|
||||
s.length <= 2 ||
|
||||
s.charCodeAt(0) !== 0x2F /* / */ ||
|
||||
exCharCodeAt(s, -1) !== 0x2F /* / */
|
||||
) {
|
||||
return this.normalizeHostnameValue(s, modeBits);
|
||||
}
|
||||
const source = this.normalizeRegexPattern(s);
|
||||
if ( source === '' ) { return ''; }
|
||||
return `/${source}/`;
|
||||
}
|
||||
|
||||
parseExt(parent, anchorBeg, anchorLen) {
|
||||
const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
|
||||
const parentEnd = this.nodes[parent+NODE_END_INDEX];
|
||||
|
@ -1756,7 +1784,7 @@ export class AstFilterParser {
|
|||
);
|
||||
this.addFlags(AST_FLAG_HAS_OPTIONS);
|
||||
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
|
||||
this.linkDown(next, this.parseDomainList(next, ',', 0b1110));
|
||||
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
|
||||
prev = this.linkRight(prev, next);
|
||||
}
|
||||
next = this.allocTypedNode(
|
||||
|
@ -2276,7 +2304,6 @@ export class AstFilterParser {
|
|||
// 0b00010: can use entity-based hostnames
|
||||
// 0b00100: can use single wildcard
|
||||
// 0b01000: can be negated
|
||||
// 0b10000: can be a regex
|
||||
//
|
||||
// returns:
|
||||
// undefined: no normalization needed, use original hostname
|
||||
|
|
|
@ -1492,21 +1492,22 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
|
|||
const hostnameMisses = [];
|
||||
const entityHits = [];
|
||||
const entityMisses = [];
|
||||
const regexHits = [];
|
||||
const regexMisses = [];
|
||||
for ( const s of iterable ) {
|
||||
const len = s.length;
|
||||
const beg = len > 1 && s.charCodeAt(0) === 0x7E /* '~' */ ? 1 : 0;
|
||||
if ( len <= beg ) { continue; }
|
||||
if ( s.endsWith('.*') === false ) {
|
||||
if ( beg === 0 ) {
|
||||
hostnameHits.push(s);
|
||||
} else {
|
||||
hostnameMisses.push(s.slice(1));
|
||||
}
|
||||
} else if ( beg === 0 ) {
|
||||
entityHits.push(s);
|
||||
} else {
|
||||
entityMisses.push(s.slice(1));
|
||||
if ( s.charCodeAt(beg) === 0x2F /* / */ ) {
|
||||
if ( beg === 0 ) { regexHits.push(s); continue; }
|
||||
regexMisses.push(s); continue;
|
||||
}
|
||||
if ( s.endsWith('.*') === false ) {
|
||||
if ( beg === 0 ) { hostnameHits.push(s); continue; }
|
||||
hostnameMisses.push(s.slice(1)); continue;
|
||||
}
|
||||
if ( beg === 0 ) { entityHits.push(s); continue; }
|
||||
entityMisses.push(s.slice(1)); continue;
|
||||
}
|
||||
const toTrie = [];
|
||||
let trieWhich = 0b00;
|
||||
|
@ -1532,6 +1533,9 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
|
|||
for ( const hn of entityHits ) {
|
||||
compiledHit.push(ctors[1].compile(hn));
|
||||
}
|
||||
for ( const hn of regexHits ) {
|
||||
compiledHit.push(ctors[3].compile(hn));
|
||||
}
|
||||
if ( compiledHit.length > 1 ) {
|
||||
compiledHit[0] = FilterDomainHitAny.compile(compiledHit.slice());
|
||||
compiledHit.length = 1;
|
||||
|
@ -1550,14 +1554,17 @@ const compileDomainOpt = (ctors, iterable, prepend, units) => {
|
|||
const compiledMiss = [];
|
||||
if ( toTrie.length !== 0 ) {
|
||||
compiledMiss.push(
|
||||
ctors[5].compile(toTrie.sort(), trieWhich)
|
||||
ctors[6].compile(toTrie.sort(), trieWhich)
|
||||
);
|
||||
}
|
||||
for ( const hn of hostnameMisses ) {
|
||||
compiledMiss.push(ctors[3].compile(hn));
|
||||
compiledMiss.push(ctors[4].compile(hn));
|
||||
}
|
||||
for ( const hn of entityMisses ) {
|
||||
compiledMiss.push(ctors[4].compile(hn));
|
||||
compiledMiss.push(ctors[5].compile(hn));
|
||||
}
|
||||
for ( const hn of regexMisses ) {
|
||||
compiledHit.push(ctors[7].compile(hn));
|
||||
}
|
||||
if ( prepend ) {
|
||||
if ( compiledHit.length !== 0 ) {
|
||||
|
@ -1749,6 +1756,47 @@ class FilterDomainHitSet {
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
class FilterDomainRegexHit {
|
||||
static getDomainOpt(idata) {
|
||||
const ref = filterRefs[filterData[idata+1]];
|
||||
return ref.restr;
|
||||
}
|
||||
|
||||
static match(idata) {
|
||||
const ref = filterRefs[filterData[idata+1]];
|
||||
if ( ref.$re === null ) {
|
||||
ref.$re = new RegExp(ref.restr.slice(1,-1));
|
||||
}
|
||||
return ref.$re.test(this.getMatchTarget());
|
||||
}
|
||||
|
||||
static compile(restr) {
|
||||
return [ this.fid, restr ];
|
||||
}
|
||||
|
||||
static fromCompiled(args) {
|
||||
const idata = filterDataAllocLen(2);
|
||||
filterData[idata+0] = args[0]; // fid
|
||||
filterData[idata+1] = filterRefAdd({ restr: args[1], $re: null });
|
||||
return idata;
|
||||
}
|
||||
|
||||
static dnrFromCompiled(args, rule) {
|
||||
rule.condition = rule.condition || {};
|
||||
const prop = this.dnrConditionName;
|
||||
if ( rule.condition[prop] === undefined ) {
|
||||
rule.condition[prop] = [];
|
||||
}
|
||||
rule.condition[prop].push(args[1]);
|
||||
}
|
||||
|
||||
static dumpInfo(idata) {
|
||||
return this.getDomainOpt(idata);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// Implement the following filter option:
|
||||
// - domain=
|
||||
// - from=
|
||||
|
@ -1845,20 +1893,44 @@ class FilterFromDomainMissSet extends FilterFromDomainHitSet {
|
|||
}
|
||||
}
|
||||
|
||||
class FilterFromRegexHit extends FilterDomainRegexHit {
|
||||
static getMatchTarget() {
|
||||
return $docHostname;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.fromDomains.push(`${this.getDomainOpt(idata)}`);
|
||||
}
|
||||
}
|
||||
|
||||
class FilterFromRegexMiss extends FilterFromRegexHit {
|
||||
static match(idata) {
|
||||
return super.match(idata) === false;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.fromDomains.push(`~${this.getDomainOpt(idata)}`);
|
||||
}
|
||||
}
|
||||
|
||||
registerFilterClass(FilterFromDomainHit);
|
||||
registerFilterClass(FilterFromDomainMiss);
|
||||
registerFilterClass(FilterFromEntityHit);
|
||||
registerFilterClass(FilterFromEntityMiss);
|
||||
registerFilterClass(FilterFromDomainHitSet);
|
||||
registerFilterClass(FilterFromDomainMissSet);
|
||||
registerFilterClass(FilterFromRegexHit);
|
||||
registerFilterClass(FilterFromRegexMiss);
|
||||
|
||||
const fromOptClasses = [
|
||||
FilterFromDomainHit,
|
||||
FilterFromEntityHit,
|
||||
FilterFromDomainHitSet,
|
||||
FilterFromRegexHit,
|
||||
FilterFromDomainMiss,
|
||||
FilterFromEntityMiss,
|
||||
FilterFromDomainMissSet,
|
||||
FilterFromRegexMiss,
|
||||
];
|
||||
|
||||
const compileFromDomainOpt = (...args) => {
|
||||
|
@ -1946,20 +2018,44 @@ class FilterToDomainMissSet extends FilterToDomainHitSet {
|
|||
}
|
||||
}
|
||||
|
||||
class FilterToRegexHit extends FilterDomainRegexHit {
|
||||
static getMatchTarget() {
|
||||
return $requestHostname;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.toDomains.push(`${this.getDomainOpt(idata)}`);
|
||||
}
|
||||
}
|
||||
|
||||
class FilterToRegexMiss extends FilterToRegexHit {
|
||||
static match(idata) {
|
||||
return super.match(idata) === false;
|
||||
}
|
||||
|
||||
static logData(idata, details) {
|
||||
details.toDomains.push(`~${this.getDomainOpt(idata)}`);
|
||||
}
|
||||
}
|
||||
|
||||
registerFilterClass(FilterToDomainHit);
|
||||
registerFilterClass(FilterToDomainMiss);
|
||||
registerFilterClass(FilterToEntityHit);
|
||||
registerFilterClass(FilterToEntityMiss);
|
||||
registerFilterClass(FilterToDomainHitSet);
|
||||
registerFilterClass(FilterToDomainMissSet);
|
||||
registerFilterClass(FilterToRegexHit);
|
||||
registerFilterClass(FilterToRegexMiss);
|
||||
|
||||
const toOptClasses = [
|
||||
FilterToDomainHit,
|
||||
FilterToEntityHit,
|
||||
FilterToDomainHitSet,
|
||||
FilterToRegexHit,
|
||||
FilterToDomainMiss,
|
||||
FilterToEntityMiss,
|
||||
FilterToDomainMissSet,
|
||||
FilterToRegexMiss,
|
||||
];
|
||||
|
||||
const compileToDomainOpt = (...args) => {
|
||||
|
@ -3678,7 +3774,7 @@ class FilterCompiler {
|
|||
isJustOrigin() {
|
||||
if ( this.optionUnitBits !== this.FROM_BIT ) { return false; }
|
||||
if ( this.isRegex ) { return false; }
|
||||
if ( this.fromDomainOpt.includes('~') ) { return false; }
|
||||
if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; }
|
||||
if ( this.pattern === '*' ) { return true; }
|
||||
if ( this.anchor !== 0b010 ) { return false; }
|
||||
if ( /^(?:http[s*]?:(?:\/\/)?)$/.test(this.pattern) ) { return true; }
|
||||
|
|
|
@ -1030,7 +1030,14 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
|||
parser.parse(line);
|
||||
|
||||
if ( parser.isFilter() === false ) { continue; }
|
||||
if ( parser.hasError() ) { continue; }
|
||||
if ( parser.hasError() ) {
|
||||
logger.writeOne({
|
||||
realm: 'message',
|
||||
type: 'error',
|
||||
text: `Invalid filter: ${parser.raw}`
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( parser.isExtendedFilter() ) {
|
||||
staticExtFilteringEngine.compile(parser, writer);
|
||||
|
|
Loading…
Reference in New Issue