Add support for regex-based values as target domain for static extended filters

Related discussion:
- https://github.com/uBlockOrigin/uBlock-issues/discussions/2234

Example of usage:

    /img[a-z]{3,5}\.buzz/##+js(nowoif)

Use sparingly, when no other solution is practical from a maintenance point
of view -- keeping in mind that uBO has to iterate through all the regex-based
values, unlike plain hosyname or entity-based values which are mere lookups.

Related commit:
- b1de8d3fe4
This commit is contained in:
Raymond Hill 2023-01-31 14:15:13 -05:00
parent c455490cf1
commit 81498474d6
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
5 changed files with 73 additions and 46 deletions

View File

@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
} }
// Retrieve filters with a non-empty hostname // Retrieve filters with a non-empty hostname
const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ];
const discardSets = [ dummySet, exceptionSet ];
this.specificFilters.retrieve( this.specificFilters.retrieve(
hostname, hostname,
options.noSpecificCosmeticFiltering !== true options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
1 1
); );
// Retrieve filters with an empty hostname // Retrieve filters with a regex-based hostname value
this.specificFilters.retrieve( this.specificFilters.retrieve(
hostname, hostname,
options.noGenericCosmeticFiltering !== true options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ] 3
: [ dummySet, exceptionSet ],
2
); );
// Retrieve filters with a non-empty entity // Retrieve filters with a entity-based hostname value
if ( request.entity !== '' ) { if ( request.entity !== '' ) {
this.specificFilters.retrieve( this.specificFilters.retrieve(
`${hostname.slice(0, -request.domain.length)}${request.entity}`, `${hostname.slice(0, -request.domain.length)}${request.entity}`,
options.noSpecificCosmeticFiltering !== true options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
1 1
); );
} }
// Retrieve filters with an empty hostname
this.specificFilters.retrieve(
hostname,
options.noGenericCosmeticFiltering ? discardSets : retrieveSets,
2
);
if ( exceptionSet.size !== 0 ) { if ( exceptionSet.size !== 0 ) {
out.exceptionFilters = Array.from(exceptionSet); out.exceptionFilters = Array.from(exceptionSet);

View File

@ -27,7 +27,6 @@ import logger from './logger.js';
import µb from './background.js'; import µb from './background.js';
import { sessionFirewall } from './filtering-engines.js'; import { sessionFirewall } from './filtering-engines.js';
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js'; import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
import * as sfp from './static-filtering-parser.js';
/******************************************************************************/ /******************************************************************************/
@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() {
htmlFilteringEngine.compile = function(parser, writer) { htmlFilteringEngine.compile = function(parser, writer) {
const isException = parser.isException(); const isException = parser.isException();
const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML);
const headerName = parser.getNodeString(root);
const { raw, compiled } = parser.result; const { raw, compiled } = parser.result;
if ( compiled === undefined ) { if ( compiled === undefined ) {
const who = writer.properties.get('name') || '?'; const who = writer.properties.get('name') || '?';
@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) {
const plains = new Set(); const plains = new Set();
const procedurals = new Set(); const procedurals = new Set();
const exceptions = new Set(); const exceptions = new Set();
const retrieveSets = [ plains, exceptions, procedurals, exceptions ];
filterDB.retrieve( filterDB.retrieve(hostname, retrieveSets);
hostname,
[ plains, exceptions, procedurals, exceptions ]
);
const entity = details.entity !== '' const entity = details.entity !== ''
? `${hostname.slice(0, -details.domain.length)}${details.entity}` ? `${hostname.slice(0, -details.domain.length)}${details.entity}`
: '*'; : '*';
filterDB.retrieve( filterDB.retrieve(entity, retrieveSets, 1);
entity,
[ plains, exceptions, procedurals, exceptions ],
1
);
if ( plains.size === 0 && procedurals.size === 0 ) { return; } if ( plains.size === 0 && procedurals.size === 0 ) { return; }

View File

@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) {
} }
const hostnameMatches = hn => { const hostnameMatches = hn => {
return hn === '' || if ( hn === '' ) { return true; }
reHostname.test(hn) || if ( hn.charCodeAt(0) === 0x2F /* / */ ) {
reEntity !== undefined && reEntity.test(hn); return (new RegExp(hn.slice(1,-1))).test(hostname);
}
if ( reHostname.test(hn) ) { return true; }
if ( reEntity === undefined ) { return false; }
if ( reEntity.test(hn) ) { return true; }
return false;
}; };
const response = Object.create(null); const response = Object.create(null);

View File

@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class {
this.timer = undefined; this.timer = undefined;
this.strToIdMap = new Map(); this.strToIdMap = new Map();
this.hostnameToSlotIdMap = new Map(); this.hostnameToSlotIdMap = new Map();
this.regexToSlotIdMap = new Map();
this.regexMap = new Map();
// Array of integer pairs // Array of integer pairs
this.hostnameSlots = []; this.hostnameSlots = [];
// Array of strings (selectors and pseudo-selectors) // Array of strings (selectors and pseudo-selectors)
@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class {
} }
} }
const strId = iStr << this.nBits | bits; const strId = iStr << this.nBits | bits;
let iHn = this.hostnameToSlotIdMap.get(hn); const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */;
let iHn = hnIsNotRegex
? this.hostnameToSlotIdMap.get(hn)
: this.regexToSlotIdMap.get(hn);
if ( iHn === undefined ) { if ( iHn === undefined ) {
if ( hnIsNotRegex ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length); this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
} else {
this.regexToSlotIdMap.set(hn, this.hostnameSlots.length);
}
this.hostnameSlots.push(strId, 0); this.hostnameSlots.push(strId, 0);
return; return;
} }
@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class {
clear() { clear() {
this.hostnameToSlotIdMap.clear(); this.hostnameToSlotIdMap.clear();
this.regexToSlotIdMap.clear();
this.hostnameSlots.length = 0; this.hostnameSlots.length = 0;
this.strSlots.length = 0; this.strSlots.length = 0;
this.strToIdMap.clear(); this.strToIdMap.clear();
this.regexMap.clear();
this.size = 0; this.size = 0;
} }
@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class {
); );
} }
// modifiers = 1: return only specific items // modifiers = 0: all items
// modifiers = 2: return only generic items // modifiers = 1: only specific items
// modifiers = 2: only generic items
// modifiers = 3: only regex-based items
// //
retrieve(hostname, out, modifiers = 0) { retrieve(hostname, out, modifiers = 0) {
if ( modifiers === 2 ) { let hn = hostname;
hostname = ''; if ( modifiers === 2 ) { hn = ''; }
}
const mask = out.length - 1; // out.length must be power of two const mask = out.length - 1; // out.length must be power of two
for (;;) { for (;;) {
let iHn = this.hostnameToSlotIdMap.get(hostname); let iHn = this.hostnameToSlotIdMap.get(hn);
if ( iHn !== undefined ) { if ( iHn !== undefined ) {
do { do {
const strId = this.hostnameSlots[iHn+0]; const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add( out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
this.strSlots[strId >>> this.nBits]
);
iHn = this.hostnameSlots[iHn+1]; iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 ); } while ( iHn !== 0 );
} }
if ( hostname === '' ) { break; } if ( hn === '' ) { break; }
const pos = hostname.indexOf('.'); const pos = hn.indexOf('.');
if ( pos === -1 ) { if ( pos === -1 ) {
if ( modifiers === 1 ) { break; } if ( modifiers === 1 ) { break; }
hostname = ''; hn = '';
} else { } else {
hostname = hostname.slice(pos + 1); hn = hn.slice(pos + 1);
} }
} }
if ( modifiers !== 0 && modifiers !== 3 ) { return; }
// TODO: consider using a combined regex to test once for whether
// iterating is worth it.
for ( const restr of this.regexToSlotIdMap.keys() ) {
let re = this.regexMap.get(restr);
if ( re === undefined ) {
this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1))));
}
if ( re.test(hostname) === false ) { continue; }
let iHn = this.regexToSlotIdMap.get(restr);
do {
const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 );
}
} }
toSelfie() { toSelfie() {
return { return {
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap), hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
regexToSlotIdMap: Array.from(this.regexToSlotIdMap),
hostnameSlots: this.hostnameSlots, hostnameSlots: this.hostnameSlots,
strSlots: this.strSlots, strSlots: this.strSlots,
size: this.size size: this.size
@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class {
fromSelfie(selfie) { fromSelfie(selfie) {
if ( selfie === undefined ) { return; } if ( selfie === undefined ) { return; }
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap); this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
// Regex-based lookup available in uBO 1.47.0 and above
if ( Array.isArray(selfie.regexToSlotIdMap) ) {
this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap);
}
this.hostnameSlots = selfie.hostnameSlots; this.hostnameSlots = selfie.hostnameSlots;
this.strSlots = selfie.strSlots; this.strSlots = selfie.strSlots;
this.size = selfie.size; this.size = selfie.size;

View File

@ -1066,8 +1066,7 @@ export class AstFilterParser {
realBad = true; realBad = true;
break; break;
case NODE_TYPE_NET_OPTION_NAME_WEBRTC: case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
bad = true; realBad = true;
realBad = isNegated || hasValue;
break; break;
case NODE_TYPE_NET_PATTERN: case NODE_TYPE_NET_PATTERN:
realBad = this.hasOptions() === false && realBad = this.hasOptions() === false &&
@ -1784,7 +1783,7 @@ export class AstFilterParser {
); );
this.addFlags(AST_FLAG_HAS_OPTIONS); this.addFlags(AST_FLAG_HAS_OPTIONS);
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next); this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
this.linkDown(next, this.parseDomainList(next, ',', 0b01110)); this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
prev = this.linkRight(prev, next); prev = this.linkRight(prev, next);
} }
next = this.allocTypedNode( next = this.allocTypedNode(