mirror of https://github.com/gorhill/uBlock.git
Add support for regex-based values as target domain for static extended filters
Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234
Example of usage:
/img[a-z]{3,5}\.buzz/##+js(nowoif)
Use sparingly, when no other solution is practical from a maintenance point
of view -- keeping in mind that uBO has to iterate through all the regex-based
values, unlike plain hosyname or entity-based values which are mere lookups.
Related commit:
- b1de8d3fe4
This commit is contained in:
parent
c455490cf1
commit
81498474d6
|
@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
|
|||
}
|
||||
|
||||
// Retrieve filters with a non-empty hostname
|
||||
const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ];
|
||||
const discardSets = [ dummySet, exceptionSet ];
|
||||
this.specificFilters.retrieve(
|
||||
hostname,
|
||||
options.noSpecificCosmeticFiltering !== true
|
||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
||||
: [ dummySet, exceptionSet ],
|
||||
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||
1
|
||||
);
|
||||
// Retrieve filters with an empty hostname
|
||||
// Retrieve filters with a regex-based hostname value
|
||||
this.specificFilters.retrieve(
|
||||
hostname,
|
||||
options.noGenericCosmeticFiltering !== true
|
||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
||||
: [ dummySet, exceptionSet ],
|
||||
2
|
||||
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||
3
|
||||
);
|
||||
// Retrieve filters with a non-empty entity
|
||||
// Retrieve filters with a entity-based hostname value
|
||||
if ( request.entity !== '' ) {
|
||||
this.specificFilters.retrieve(
|
||||
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
|
||||
options.noSpecificCosmeticFiltering !== true
|
||||
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
|
||||
: [ dummySet, exceptionSet ],
|
||||
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
|
||||
1
|
||||
);
|
||||
}
|
||||
// Retrieve filters with an empty hostname
|
||||
this.specificFilters.retrieve(
|
||||
hostname,
|
||||
options.noGenericCosmeticFiltering ? discardSets : retrieveSets,
|
||||
2
|
||||
);
|
||||
|
||||
if ( exceptionSet.size !== 0 ) {
|
||||
out.exceptionFilters = Array.from(exceptionSet);
|
||||
|
|
|
@ -27,7 +27,6 @@ import logger from './logger.js';
|
|||
import µb from './background.js';
|
||||
import { sessionFirewall } from './filtering-engines.js';
|
||||
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
|
||||
import * as sfp from './static-filtering-parser.js';
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
|
@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() {
|
|||
|
||||
htmlFilteringEngine.compile = function(parser, writer) {
|
||||
const isException = parser.isException();
|
||||
const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML);
|
||||
const headerName = parser.getNodeString(root);
|
||||
|
||||
const { raw, compiled } = parser.result;
|
||||
if ( compiled === undefined ) {
|
||||
const who = writer.properties.get('name') || '?';
|
||||
|
@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) {
|
|||
const plains = new Set();
|
||||
const procedurals = new Set();
|
||||
const exceptions = new Set();
|
||||
const retrieveSets = [ plains, exceptions, procedurals, exceptions ];
|
||||
|
||||
filterDB.retrieve(
|
||||
hostname,
|
||||
[ plains, exceptions, procedurals, exceptions ]
|
||||
);
|
||||
filterDB.retrieve(hostname, retrieveSets);
|
||||
const entity = details.entity !== ''
|
||||
? `${hostname.slice(0, -details.domain.length)}${details.entity}`
|
||||
: '*';
|
||||
filterDB.retrieve(
|
||||
entity,
|
||||
[ plains, exceptions, procedurals, exceptions ],
|
||||
1
|
||||
);
|
||||
filterDB.retrieve(entity, retrieveSets, 1);
|
||||
|
||||
if ( plains.size === 0 && procedurals.size === 0 ) { return; }
|
||||
|
||||
|
|
|
@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) {
|
|||
}
|
||||
|
||||
const hostnameMatches = hn => {
|
||||
return hn === '' ||
|
||||
reHostname.test(hn) ||
|
||||
reEntity !== undefined && reEntity.test(hn);
|
||||
if ( hn === '' ) { return true; }
|
||||
if ( hn.charCodeAt(0) === 0x2F /* / */ ) {
|
||||
return (new RegExp(hn.slice(1,-1))).test(hostname);
|
||||
}
|
||||
if ( reHostname.test(hn) ) { return true; }
|
||||
if ( reEntity === undefined ) { return false; }
|
||||
if ( reEntity.test(hn) ) { return true; }
|
||||
return false;
|
||||
};
|
||||
|
||||
const response = Object.create(null);
|
||||
|
|
|
@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class {
|
|||
this.timer = undefined;
|
||||
this.strToIdMap = new Map();
|
||||
this.hostnameToSlotIdMap = new Map();
|
||||
this.regexToSlotIdMap = new Map();
|
||||
this.regexMap = new Map();
|
||||
// Array of integer pairs
|
||||
this.hostnameSlots = [];
|
||||
// Array of strings (selectors and pseudo-selectors)
|
||||
|
@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class {
|
|||
}
|
||||
}
|
||||
const strId = iStr << this.nBits | bits;
|
||||
let iHn = this.hostnameToSlotIdMap.get(hn);
|
||||
const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */;
|
||||
let iHn = hnIsNotRegex
|
||||
? this.hostnameToSlotIdMap.get(hn)
|
||||
: this.regexToSlotIdMap.get(hn);
|
||||
if ( iHn === undefined ) {
|
||||
if ( hnIsNotRegex ) {
|
||||
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
|
||||
} else {
|
||||
this.regexToSlotIdMap.set(hn, this.hostnameSlots.length);
|
||||
}
|
||||
this.hostnameSlots.push(strId, 0);
|
||||
return;
|
||||
}
|
||||
|
@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class {
|
|||
|
||||
clear() {
|
||||
this.hostnameToSlotIdMap.clear();
|
||||
this.regexToSlotIdMap.clear();
|
||||
this.hostnameSlots.length = 0;
|
||||
this.strSlots.length = 0;
|
||||
this.strToIdMap.clear();
|
||||
this.regexMap.clear();
|
||||
this.size = 0;
|
||||
}
|
||||
|
||||
|
@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class {
|
|||
);
|
||||
}
|
||||
|
||||
// modifiers = 1: return only specific items
|
||||
// modifiers = 2: return only generic items
|
||||
// modifiers = 0: all items
|
||||
// modifiers = 1: only specific items
|
||||
// modifiers = 2: only generic items
|
||||
// modifiers = 3: only regex-based items
|
||||
//
|
||||
retrieve(hostname, out, modifiers = 0) {
|
||||
if ( modifiers === 2 ) {
|
||||
hostname = '';
|
||||
}
|
||||
let hn = hostname;
|
||||
if ( modifiers === 2 ) { hn = ''; }
|
||||
const mask = out.length - 1; // out.length must be power of two
|
||||
for (;;) {
|
||||
let iHn = this.hostnameToSlotIdMap.get(hostname);
|
||||
let iHn = this.hostnameToSlotIdMap.get(hn);
|
||||
if ( iHn !== undefined ) {
|
||||
do {
|
||||
const strId = this.hostnameSlots[iHn+0];
|
||||
out[strId & mask].add(
|
||||
this.strSlots[strId >>> this.nBits]
|
||||
);
|
||||
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
|
||||
iHn = this.hostnameSlots[iHn+1];
|
||||
} while ( iHn !== 0 );
|
||||
}
|
||||
if ( hostname === '' ) { break; }
|
||||
const pos = hostname.indexOf('.');
|
||||
if ( hn === '' ) { break; }
|
||||
const pos = hn.indexOf('.');
|
||||
if ( pos === -1 ) {
|
||||
if ( modifiers === 1 ) { break; }
|
||||
hostname = '';
|
||||
hn = '';
|
||||
} else {
|
||||
hostname = hostname.slice(pos + 1);
|
||||
hn = hn.slice(pos + 1);
|
||||
}
|
||||
}
|
||||
if ( modifiers !== 0 && modifiers !== 3 ) { return; }
|
||||
// TODO: consider using a combined regex to test once for whether
|
||||
// iterating is worth it.
|
||||
for ( const restr of this.regexToSlotIdMap.keys() ) {
|
||||
let re = this.regexMap.get(restr);
|
||||
if ( re === undefined ) {
|
||||
this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1))));
|
||||
}
|
||||
if ( re.test(hostname) === false ) { continue; }
|
||||
let iHn = this.regexToSlotIdMap.get(restr);
|
||||
do {
|
||||
const strId = this.hostnameSlots[iHn+0];
|
||||
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
|
||||
iHn = this.hostnameSlots[iHn+1];
|
||||
} while ( iHn !== 0 );
|
||||
}
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return {
|
||||
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
|
||||
regexToSlotIdMap: Array.from(this.regexToSlotIdMap),
|
||||
hostnameSlots: this.hostnameSlots,
|
||||
strSlots: this.strSlots,
|
||||
size: this.size
|
||||
|
@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class {
|
|||
fromSelfie(selfie) {
|
||||
if ( selfie === undefined ) { return; }
|
||||
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
|
||||
// Regex-based lookup available in uBO 1.47.0 and above
|
||||
if ( Array.isArray(selfie.regexToSlotIdMap) ) {
|
||||
this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap);
|
||||
}
|
||||
this.hostnameSlots = selfie.hostnameSlots;
|
||||
this.strSlots = selfie.strSlots;
|
||||
this.size = selfie.size;
|
||||
|
|
|
@ -1066,8 +1066,7 @@ export class AstFilterParser {
|
|||
realBad = true;
|
||||
break;
|
||||
case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
|
||||
bad = true;
|
||||
realBad = isNegated || hasValue;
|
||||
realBad = true;
|
||||
break;
|
||||
case NODE_TYPE_NET_PATTERN:
|
||||
realBad = this.hasOptions() === false &&
|
||||
|
@ -1784,7 +1783,7 @@ export class AstFilterParser {
|
|||
);
|
||||
this.addFlags(AST_FLAG_HAS_OPTIONS);
|
||||
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
|
||||
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
|
||||
this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
|
||||
prev = this.linkRight(prev, next);
|
||||
}
|
||||
next = this.allocTypedNode(
|
||||
|
|
Loading…
Reference in New Issue