Add support for regex-based values as target domain for static extended filters

Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234

Example of usage:

    /img[a-z]{3,5}\.buzz/##+js(nowoif)

Use sparingly, when no other solution is practical from a maintenance point
of view -- keeping in mind that uBO has to iterate through all the regex-based
values, unlike plain hosyname or entity-based values which are mere lookups.

Related commit:
- b1de8d3fe4
This commit is contained in:
Raymond Hill 2023-01-31 14:15:13 -05:00
parent c455490cf1
commit 81498474d6
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
5 changed files with 73 additions and 46 deletions

View File

@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
}
// Retrieve filters with a non-empty hostname
const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ];
const discardSets = [ dummySet, exceptionSet ];
this.specificFilters.retrieve(
hostname,
options.noSpecificCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
1
);
// Retrieve filters with an empty hostname
// Retrieve filters with a regex-based hostname value
this.specificFilters.retrieve(
hostname,
options.noGenericCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
2
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
3
);
// Retrieve filters with a non-empty entity
// Retrieve filters with a entity-based hostname value
if ( request.entity !== '' ) {
this.specificFilters.retrieve(
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
options.noSpecificCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
1
);
}
// Retrieve filters with an empty hostname
this.specificFilters.retrieve(
hostname,
options.noGenericCosmeticFiltering ? discardSets : retrieveSets,
2
);
if ( exceptionSet.size !== 0 ) {
out.exceptionFilters = Array.from(exceptionSet);

View File

@ -27,7 +27,6 @@ import logger from './logger.js';
import µb from './background.js';
import { sessionFirewall } from './filtering-engines.js';
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
import * as sfp from './static-filtering-parser.js';
/******************************************************************************/
@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() {
htmlFilteringEngine.compile = function(parser, writer) {
const isException = parser.isException();
const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML);
const headerName = parser.getNodeString(root);
const { raw, compiled } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('name') || '?';
@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) {
const plains = new Set();
const procedurals = new Set();
const exceptions = new Set();
const retrieveSets = [ plains, exceptions, procedurals, exceptions ];
filterDB.retrieve(
hostname,
[ plains, exceptions, procedurals, exceptions ]
);
filterDB.retrieve(hostname, retrieveSets);
const entity = details.entity !== ''
? `${hostname.slice(0, -details.domain.length)}${details.entity}`
: '*';
filterDB.retrieve(
entity,
[ plains, exceptions, procedurals, exceptions ],
1
);
filterDB.retrieve(entity, retrieveSets, 1);
if ( plains.size === 0 && procedurals.size === 0 ) { return; }

View File

@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) {
}
const hostnameMatches = hn => {
return hn === '' ||
reHostname.test(hn) ||
reEntity !== undefined && reEntity.test(hn);
if ( hn === '' ) { return true; }
if ( hn.charCodeAt(0) === 0x2F /* / */ ) {
return (new RegExp(hn.slice(1,-1))).test(hostname);
}
if ( reHostname.test(hn) ) { return true; }
if ( reEntity === undefined ) { return false; }
if ( reEntity.test(hn) ) { return true; }
return false;
};
const response = Object.create(null);

View File

@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class {
this.timer = undefined;
this.strToIdMap = new Map();
this.hostnameToSlotIdMap = new Map();
this.regexToSlotIdMap = new Map();
this.regexMap = new Map();
// Array of integer pairs
this.hostnameSlots = [];
// Array of strings (selectors and pseudo-selectors)
@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class {
}
}
const strId = iStr << this.nBits | bits;
let iHn = this.hostnameToSlotIdMap.get(hn);
const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */;
let iHn = hnIsNotRegex
? this.hostnameToSlotIdMap.get(hn)
: this.regexToSlotIdMap.get(hn);
if ( iHn === undefined ) {
if ( hnIsNotRegex ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
} else {
this.regexToSlotIdMap.set(hn, this.hostnameSlots.length);
}
this.hostnameSlots.push(strId, 0);
return;
}
@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class {
clear() {
this.hostnameToSlotIdMap.clear();
this.regexToSlotIdMap.clear();
this.hostnameSlots.length = 0;
this.strSlots.length = 0;
this.strToIdMap.clear();
this.regexMap.clear();
this.size = 0;
}
@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class {
);
}
// modifiers = 1: return only specific items
// modifiers = 2: return only generic items
// modifiers = 0: all items
// modifiers = 1: only specific items
// modifiers = 2: only generic items
// modifiers = 3: only regex-based items
//
retrieve(hostname, out, modifiers = 0) {
if ( modifiers === 2 ) {
hostname = '';
}
let hn = hostname;
if ( modifiers === 2 ) { hn = ''; }
const mask = out.length - 1; // out.length must be power of two
for (;;) {
let iHn = this.hostnameToSlotIdMap.get(hostname);
let iHn = this.hostnameToSlotIdMap.get(hn);
if ( iHn !== undefined ) {
do {
const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add(
this.strSlots[strId >>> this.nBits]
);
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 );
}
if ( hostname === '' ) { break; }
const pos = hostname.indexOf('.');
if ( hn === '' ) { break; }
const pos = hn.indexOf('.');
if ( pos === -1 ) {
if ( modifiers === 1 ) { break; }
hostname = '';
hn = '';
} else {
hostname = hostname.slice(pos + 1);
hn = hn.slice(pos + 1);
}
}
if ( modifiers !== 0 && modifiers !== 3 ) { return; }
// TODO: consider using a combined regex to test once for whether
// iterating is worth it.
for ( const restr of this.regexToSlotIdMap.keys() ) {
let re = this.regexMap.get(restr);
if ( re === undefined ) {
this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1))));
}
if ( re.test(hostname) === false ) { continue; }
let iHn = this.regexToSlotIdMap.get(restr);
do {
const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 );
}
}
toSelfie() {
return {
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
regexToSlotIdMap: Array.from(this.regexToSlotIdMap),
hostnameSlots: this.hostnameSlots,
strSlots: this.strSlots,
size: this.size
@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class {
fromSelfie(selfie) {
if ( selfie === undefined ) { return; }
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
// Regex-based lookup available in uBO 1.47.0 and above
if ( Array.isArray(selfie.regexToSlotIdMap) ) {
this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap);
}
this.hostnameSlots = selfie.hostnameSlots;
this.strSlots = selfie.strSlots;
this.size = selfie.size;

View File

@ -1066,8 +1066,7 @@ export class AstFilterParser {
realBad = true;
break;
case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
bad = true;
realBad = isNegated || hasValue;
realBad = true;
break;
case NODE_TYPE_NET_PATTERN:
realBad = this.hasOptions() === false &&
@ -1784,7 +1783,7 @@ export class AstFilterParser {
);
this.addFlags(AST_FLAG_HAS_OPTIONS);
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
prev = this.linkRight(prev, next);
}
next = this.allocTypedNode(