mirror of https://github.com/gorhill/uBlock.git
Add support for regex-based values as target domain for static extended filters
Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234
Example of usage:
/img[a-z]{3,5}\.buzz/##+js(nowoif)
Use sparingly, when no other solution is practical from a maintenance point
of view -- keeping in mind that uBO has to iterate through all the regex-based
values, unlike plain hosyname or entity-based values which are mere lookups.
Related commit:
- b1de8d3fe4
This commit is contained in:
parent
c455490cf1
commit
81498474d6
|
@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve filters with a non-empty hostname
|
// Retrieve filters with a non-empty hostname
|
||||||
|
const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ];
|
||||||
|
const discardSets = [ dummySet, exceptionSet ];
|
||||||
this.specificFilters.retrieve(
|
this.specificFilters.retrieve(
|
||||||
hostname,
|
hostname,
|
||||||
options.noSpecificCosmeticFiltering !== true
|
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
|
||||||
: [ dummySet, exceptionSet ],
|
|
||||||
1
|
1
|
||||||
);
|
);
|
||||||
// Retrieve filters with an empty hostname
|
// Retrieve filters with a regex-based hostname value
|
||||||
this.specificFilters.retrieve(
|
this.specificFilters.retrieve(
|
||||||
hostname,
|
hostname,
|
||||||
options.noGenericCosmeticFiltering !== true
|
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
3
|
||||||
: [ dummySet, exceptionSet ],
|
|
||||||
2
|
|
||||||
);
|
);
|
||||||
// Retrieve filters with a non-empty entity
|
// Retrieve filters with a entity-based hostname value
|
||||||
if ( request.entity !== '' ) {
|
if ( request.entity !== '' ) {
|
||||||
this.specificFilters.retrieve(
|
this.specificFilters.retrieve(
|
||||||
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
|
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
|
||||||
options.noSpecificCosmeticFiltering !== true
|
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
|
||||||
: [ dummySet, exceptionSet ],
|
|
||||||
1
|
1
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// Retrieve filters with an empty hostname
|
||||||
|
this.specificFilters.retrieve(
|
||||||
|
hostname,
|
||||||
|
options.noGenericCosmeticFiltering ? discardSets : retrieveSets,
|
||||||
|
2
|
||||||
|
);
|
||||||
|
|
||||||
if ( exceptionSet.size !== 0 ) {
|
if ( exceptionSet.size !== 0 ) {
|
||||||
out.exceptionFilters = Array.from(exceptionSet);
|
out.exceptionFilters = Array.from(exceptionSet);
|
||||||
|
|
|
@ -27,7 +27,6 @@ import logger from './logger.js';
|
||||||
import µb from './background.js';
|
import µb from './background.js';
|
||||||
import { sessionFirewall } from './filtering-engines.js';
|
import { sessionFirewall } from './filtering-engines.js';
|
||||||
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
|
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
|
||||||
import * as sfp from './static-filtering-parser.js';
|
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() {
|
||||||
|
|
||||||
htmlFilteringEngine.compile = function(parser, writer) {
|
htmlFilteringEngine.compile = function(parser, writer) {
|
||||||
const isException = parser.isException();
|
const isException = parser.isException();
|
||||||
const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML);
|
|
||||||
const headerName = parser.getNodeString(root);
|
|
||||||
|
|
||||||
const { raw, compiled } = parser.result;
|
const { raw, compiled } = parser.result;
|
||||||
if ( compiled === undefined ) {
|
if ( compiled === undefined ) {
|
||||||
const who = writer.properties.get('name') || '?';
|
const who = writer.properties.get('name') || '?';
|
||||||
|
@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) {
|
||||||
const plains = new Set();
|
const plains = new Set();
|
||||||
const procedurals = new Set();
|
const procedurals = new Set();
|
||||||
const exceptions = new Set();
|
const exceptions = new Set();
|
||||||
|
const retrieveSets = [ plains, exceptions, procedurals, exceptions ];
|
||||||
|
|
||||||
filterDB.retrieve(
|
filterDB.retrieve(hostname, retrieveSets);
|
||||||
hostname,
|
|
||||||
[ plains, exceptions, procedurals, exceptions ]
|
|
||||||
);
|
|
||||||
const entity = details.entity !== ''
|
const entity = details.entity !== ''
|
||||||
? `${hostname.slice(0, -details.domain.length)}${details.entity}`
|
? `${hostname.slice(0, -details.domain.length)}${details.entity}`
|
||||||
: '*';
|
: '*';
|
||||||
filterDB.retrieve(
|
filterDB.retrieve(entity, retrieveSets, 1);
|
||||||
entity,
|
|
||||||
[ plains, exceptions, procedurals, exceptions ],
|
|
||||||
1
|
|
||||||
);
|
|
||||||
|
|
||||||
if ( plains.size === 0 && procedurals.size === 0 ) { return; }
|
if ( plains.size === 0 && procedurals.size === 0 ) { return; }
|
||||||
|
|
||||||
|
|
|
@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const hostnameMatches = hn => {
|
const hostnameMatches = hn => {
|
||||||
return hn === '' ||
|
if ( hn === '' ) { return true; }
|
||||||
reHostname.test(hn) ||
|
if ( hn.charCodeAt(0) === 0x2F /* / */ ) {
|
||||||
reEntity !== undefined && reEntity.test(hn);
|
return (new RegExp(hn.slice(1,-1))).test(hostname);
|
||||||
|
}
|
||||||
|
if ( reHostname.test(hn) ) { return true; }
|
||||||
|
if ( reEntity === undefined ) { return false; }
|
||||||
|
if ( reEntity.test(hn) ) { return true; }
|
||||||
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
const response = Object.create(null);
|
const response = Object.create(null);
|
||||||
|
|
|
@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class {
|
||||||
this.timer = undefined;
|
this.timer = undefined;
|
||||||
this.strToIdMap = new Map();
|
this.strToIdMap = new Map();
|
||||||
this.hostnameToSlotIdMap = new Map();
|
this.hostnameToSlotIdMap = new Map();
|
||||||
|
this.regexToSlotIdMap = new Map();
|
||||||
|
this.regexMap = new Map();
|
||||||
// Array of integer pairs
|
// Array of integer pairs
|
||||||
this.hostnameSlots = [];
|
this.hostnameSlots = [];
|
||||||
// Array of strings (selectors and pseudo-selectors)
|
// Array of strings (selectors and pseudo-selectors)
|
||||||
|
@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const strId = iStr << this.nBits | bits;
|
const strId = iStr << this.nBits | bits;
|
||||||
let iHn = this.hostnameToSlotIdMap.get(hn);
|
const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */;
|
||||||
|
let iHn = hnIsNotRegex
|
||||||
|
? this.hostnameToSlotIdMap.get(hn)
|
||||||
|
: this.regexToSlotIdMap.get(hn);
|
||||||
if ( iHn === undefined ) {
|
if ( iHn === undefined ) {
|
||||||
|
if ( hnIsNotRegex ) {
|
||||||
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
|
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
|
||||||
|
} else {
|
||||||
|
this.regexToSlotIdMap.set(hn, this.hostnameSlots.length);
|
||||||
|
}
|
||||||
this.hostnameSlots.push(strId, 0);
|
this.hostnameSlots.push(strId, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class {
|
||||||
|
|
||||||
clear() {
|
clear() {
|
||||||
this.hostnameToSlotIdMap.clear();
|
this.hostnameToSlotIdMap.clear();
|
||||||
|
this.regexToSlotIdMap.clear();
|
||||||
this.hostnameSlots.length = 0;
|
this.hostnameSlots.length = 0;
|
||||||
this.strSlots.length = 0;
|
this.strSlots.length = 0;
|
||||||
this.strToIdMap.clear();
|
this.strToIdMap.clear();
|
||||||
|
this.regexMap.clear();
|
||||||
this.size = 0;
|
this.size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// modifiers = 1: return only specific items
|
// modifiers = 0: all items
|
||||||
// modifiers = 2: return only generic items
|
// modifiers = 1: only specific items
|
||||||
|
// modifiers = 2: only generic items
|
||||||
|
// modifiers = 3: only regex-based items
|
||||||
//
|
//
|
||||||
retrieve(hostname, out, modifiers = 0) {
|
retrieve(hostname, out, modifiers = 0) {
|
||||||
if ( modifiers === 2 ) {
|
let hn = hostname;
|
||||||
hostname = '';
|
if ( modifiers === 2 ) { hn = ''; }
|
||||||
}
|
|
||||||
const mask = out.length - 1; // out.length must be power of two
|
const mask = out.length - 1; // out.length must be power of two
|
||||||
for (;;) {
|
for (;;) {
|
||||||
let iHn = this.hostnameToSlotIdMap.get(hostname);
|
let iHn = this.hostnameToSlotIdMap.get(hn);
|
||||||
if ( iHn !== undefined ) {
|
if ( iHn !== undefined ) {
|
||||||
do {
|
do {
|
||||||
const strId = this.hostnameSlots[iHn+0];
|
const strId = this.hostnameSlots[iHn+0];
|
||||||
out[strId & mask].add(
|
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
|
||||||
this.strSlots[strId >>> this.nBits]
|
|
||||||
);
|
|
||||||
iHn = this.hostnameSlots[iHn+1];
|
iHn = this.hostnameSlots[iHn+1];
|
||||||
} while ( iHn !== 0 );
|
} while ( iHn !== 0 );
|
||||||
}
|
}
|
||||||
if ( hostname === '' ) { break; }
|
if ( hn === '' ) { break; }
|
||||||
const pos = hostname.indexOf('.');
|
const pos = hn.indexOf('.');
|
||||||
if ( pos === -1 ) {
|
if ( pos === -1 ) {
|
||||||
if ( modifiers === 1 ) { break; }
|
if ( modifiers === 1 ) { break; }
|
||||||
hostname = '';
|
hn = '';
|
||||||
} else {
|
} else {
|
||||||
hostname = hostname.slice(pos + 1);
|
hn = hn.slice(pos + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if ( modifiers !== 0 && modifiers !== 3 ) { return; }
|
||||||
|
// TODO: consider using a combined regex to test once for whether
|
||||||
|
// iterating is worth it.
|
||||||
|
for ( const restr of this.regexToSlotIdMap.keys() ) {
|
||||||
|
let re = this.regexMap.get(restr);
|
||||||
|
if ( re === undefined ) {
|
||||||
|
this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1))));
|
||||||
|
}
|
||||||
|
if ( re.test(hostname) === false ) { continue; }
|
||||||
|
let iHn = this.regexToSlotIdMap.get(restr);
|
||||||
|
do {
|
||||||
|
const strId = this.hostnameSlots[iHn+0];
|
||||||
|
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
|
||||||
|
iHn = this.hostnameSlots[iHn+1];
|
||||||
|
} while ( iHn !== 0 );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
toSelfie() {
|
toSelfie() {
|
||||||
return {
|
return {
|
||||||
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
|
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
|
||||||
|
regexToSlotIdMap: Array.from(this.regexToSlotIdMap),
|
||||||
hostnameSlots: this.hostnameSlots,
|
hostnameSlots: this.hostnameSlots,
|
||||||
strSlots: this.strSlots,
|
strSlots: this.strSlots,
|
||||||
size: this.size
|
size: this.size
|
||||||
|
@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class {
|
||||||
fromSelfie(selfie) {
|
fromSelfie(selfie) {
|
||||||
if ( selfie === undefined ) { return; }
|
if ( selfie === undefined ) { return; }
|
||||||
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
|
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
|
||||||
|
// Regex-based lookup available in uBO 1.47.0 and above
|
||||||
|
if ( Array.isArray(selfie.regexToSlotIdMap) ) {
|
||||||
|
this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap);
|
||||||
|
}
|
||||||
this.hostnameSlots = selfie.hostnameSlots;
|
this.hostnameSlots = selfie.hostnameSlots;
|
||||||
this.strSlots = selfie.strSlots;
|
this.strSlots = selfie.strSlots;
|
||||||
this.size = selfie.size;
|
this.size = selfie.size;
|
||||||
|
|
|
@ -1066,8 +1066,7 @@ export class AstFilterParser {
|
||||||
realBad = true;
|
realBad = true;
|
||||||
break;
|
break;
|
||||||
case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
|
case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
|
||||||
bad = true;
|
realBad = true;
|
||||||
realBad = isNegated || hasValue;
|
|
||||||
break;
|
break;
|
||||||
case NODE_TYPE_NET_PATTERN:
|
case NODE_TYPE_NET_PATTERN:
|
||||||
realBad = this.hasOptions() === false &&
|
realBad = this.hasOptions() === false &&
|
||||||
|
@ -1784,7 +1783,7 @@ export class AstFilterParser {
|
||||||
);
|
);
|
||||||
this.addFlags(AST_FLAG_HAS_OPTIONS);
|
this.addFlags(AST_FLAG_HAS_OPTIONS);
|
||||||
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
|
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
|
||||||
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
|
this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
|
||||||
prev = this.linkRight(prev, next);
|
prev = this.linkRight(prev, next);
|
||||||
}
|
}
|
||||||
next = this.allocTypedNode(
|
next = this.allocTypedNode(
|
||||||
|
|
Loading…
Reference in New Issue