Implement network filter option `replace=`

Reference documentation:
https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier

This is a network filter option which can only be loaded from a
trusted source.

Since this filter is about modifying the response body, it currently
only works in Firefox.

As discussed with filter list maintainers.
This commit is contained in:
Raymond Hill 2023-11-03 18:59:33 -04:00
parent aeff955667
commit 7c3e060c01
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
7 changed files with 543 additions and 450 deletions

View File

@ -184,8 +184,8 @@ const µBlock = { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 56, // Increase when compiled format changes
selfieMagic: 56, // Increase when selfie format changes
compiledMagic: 57, // Increase when compiled format changes
selfieMagic: 57, // Increase when selfie format changes
},
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

View File

@ -174,6 +174,7 @@ const loadBenchmarkDataset = (( ) => {
let removeparamCount = 0;
let cspCount = 0;
let permissionsCount = 0;
let replaceCount = 0;
for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i];
fctxt.setURL(request.url);
@ -202,6 +203,9 @@ const loadBenchmarkDataset = (( ) => {
}
}
staticNetFilteringEngine.matchHeaders(fctxt, []);
if ( staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace') ) {
replaceCount += 1;
}
} else if ( redirectEngine !== undefined ) {
if ( staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt) ) {
redirectCount += 1;
@ -222,6 +226,7 @@ const loadBenchmarkDataset = (( ) => {
`\tremoveparam=: ${removeparamCount}`,
`\tcsp=: ${cspCount}`,
`\tpermissions=: ${permissionsCount}`,
`\treplace=: ${replaceCount}`,
];
const s = output.join('\n');
console.info(s);

View File

@ -175,6 +175,7 @@ const FilteringContext = class {
fromFilteringContext(other) {
this.realm = other.realm;
this.id = other.id;
this.type = other.type;
this.method = other.method;
this.url = other.url;

View File

@ -429,15 +429,15 @@ htmlFilteringEngine.retrieve = function(details) {
}
};
htmlFilteringEngine.apply = function(doc, details) {
htmlFilteringEngine.apply = function(doc, details, selectors) {
docRegister = doc;
let modified = false;
for ( const selector of details.selectors.plains ) {
for ( const selector of selectors.plains ) {
if ( applyCSSSelector(details, selector) ) {
modified = true;
}
}
for ( const selector of details.selectors.procedurals ) {
for ( const selector of selectors.procedurals ) {
if ( applyProceduralSelector(details, selector) ) {
modified = true;
}

View File

@ -187,6 +187,7 @@ export const NODE_TYPE_NET_OPTION_NAME_POPUP = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REPLACE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SCRIPT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SHIDE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_TO = iota++;
@ -265,6 +266,7 @@ export const nodeTypeFromOptionName = new Map([
/* synonym */ [ 'rewrite', NODE_TYPE_NET_OPTION_NAME_REDIRECT ],
[ 'redirect-rule', NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ],
[ 'removeparam', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'replace', NODE_TYPE_NET_OPTION_NAME_REPLACE ],
/* synonym */ [ 'queryprune', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'script', NODE_TYPE_NET_OPTION_NAME_SCRIPT ],
[ 'shide', NODE_TYPE_NET_OPTION_NAME_SHIDE ],
@ -597,9 +599,14 @@ const exCharCodeAt = (s, i) => {
return pos >= 0 ? s.charCodeAt(pos) : -1;
};
const toEscapedCharRegex = c => {
const safe = c.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${safe}`, 'g');
};
/******************************************************************************/
class argListParser {
class ArgListParser {
constructor(separatorChar = ',', mustQuote = false) {
this.separatorChar = this.actualSeparatorChar = separatorChar;
this.separatorCode = this.actualSeparatorCode = separatorChar.charCodeAt(0);
@ -612,10 +619,10 @@ class argListParser {
this.reWhitespaceStart = /^\s+/;
this.reWhitespaceEnd = /\s+$/;
this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/;
this.reEscapedDoubleQuote = /((?:^|[^\\])(?:\\\\)*)\\"/g;
this.reEscapedSingleQuote = /((?:^|[^\\])(?:\\\\)*)\\'/g;
this.reEscapedBacktick = /((?:^|[^\\])(?:\\\\)*)\\`/g;
this.reEscapedSeparator = new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${this.separatorChar}`, 'g');
this.reEscapedDoubleQuote = toEscapedCharRegex('"');
this.reEscapedSingleQuote = toEscapedCharRegex("'");
this.reEscapedBacktick = toEscapedCharRegex('`');
this.reEscapedSeparator = toEscapedCharRegex(this.separatorChar);
this.unescapedSeparator = `$1${this.separatorChar}`;
}
nextArg(pattern, beg = 0) {
@ -871,7 +878,7 @@ export class AstFilterParser {
this.rePlainEntity = /^(?:[\da-z][\da-z_-]*\.)+\*$/;
this.reHostsSink = /^[\w%.:\[\]-]+\s+/;
this.reHostsRedirect = /(?:0\.0\.0\.0|broadcasthost|local|localhost(?:\.localdomain)?|ip6-\w+)(?:[^\w.-]|$)/;
this.reNetOptionComma = /,(?!\d*\})/g;
this.reNetOptionComma = /,(?:~?[13a-z-]+(?:=.*?)?|_+)(?:,|$)/;
this.rePointlessLeftAnchor = /^\|\|?\*+/;
this.reIsTokenChar = /^[%0-9A-Za-z]/;
this.rePointlessLeadingWildcards = /^(\*+)[^%0-9A-Za-z\u{a0}-\u{10FFFF}]/u;
@ -898,7 +905,7 @@ export class AstFilterParser {
this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reNoopOption = /^_+$/;
this.scriptletArgListParser = new argListParser(',');
this.scriptletArgListParser = new ArgListParser(',');
}
parse(raw) {
@ -1414,6 +1421,7 @@ export class AstFilterParser {
break;
case NODE_TYPE_NET_OPTION_NAME_REDIRECT:
case NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case NODE_TYPE_NET_OPTION_NAME_REPLACE:
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = isNegated || (isException || hasValue) === false ||
modifierType !== 0;
@ -1474,6 +1482,20 @@ export class AstFilterParser {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
break;
}
case NODE_TYPE_NET_OPTION_NAME_REPLACE: {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; }
if ( this.options.trustedSource !== true ) {
this.astError = AST_ERROR_UNTRUSTED_SOURCE;
realBad = true;
break;
}
if ( this.interactive ) {
const value = this.getNetOptionValue(NODE_TYPE_NET_OPTION_NAME_REPLACE);
realBad = parseReplaceValue(value) === undefined;
}
break;
}
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; }
@ -1959,9 +1981,8 @@ export class AstFilterParser {
}
endOfNetOption(s, beg) {
this.reNetOptionComma.lastIndex = beg;
const match = this.reNetOptionComma.exec(s);
return match !== null ? match.index : s.length;
const match = this.reNetOptionComma.exec(s.slice(beg));
return match !== null ? beg + match.index : s.length;
}
parseNetOption(parent) {
@ -2975,6 +2996,39 @@ export function parseHeaderValue(arg) {
return out;
}
// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier
export function parseReplaceValue(s) {
if ( s.charCodeAt(0) !== 0x2F /* / */ ) { return; }
const { reEscapedComma, reEscapedDollarSign } = parseReplaceValue;
const parser = new ArgListParser('/');
parser.nextArg(s, 1);
let pattern = s.slice(parser.argBeg, parser.argEnd);
if ( parser.transform ) {
pattern = parser.normalizeArg(pattern);
}
pattern = pattern
.replace(reEscapedDollarSign, '$1$$$')
.replace(reEscapedComma, '$1,');
parser.nextArg(s, parser.separatorEnd);
let replacement = s.slice(parser.argBeg, parser.argEnd);
if ( parser.separatorEnd === parser.separatorBeg ) { return; }
if ( parser.transform ) {
replacement = parser.normalizeArg(replacement);
}
replacement = replacement
.replace(reEscapedDollarSign, '$1$$')
.replace(reEscapedComma, '$1,');
const flags = s.slice(parser.separatorEnd);
try {
return { re: new RegExp(pattern, flags), replacement };
} catch(_) {
}
}
parseReplaceValue.reEscapedDollarSign = toEscapedCharRegex('$');
parseReplaceValue.reEscapedComma = toEscapedCharRegex(',');
/******************************************************************************/
export const netOptionTokenDescriptors = new Map([
@ -3025,6 +3079,7 @@ export const netOptionTokenDescriptors = new Map([
/* synonym */ [ 'rewrite', { mustAssign: true } ],
[ 'redirect-rule', { mustAssign: true } ],
[ 'removeparam', { } ],
[ 'replace', { mustAssign: true } ],
/* synonym */ [ 'queryprune', { } ],
[ 'script', { canNegate: true } ],
[ 'shide', { } ],

View File

@ -69,23 +69,29 @@ const keyvalStore = typeof vAPI !== 'undefined'
// |+-------------- bit 10: headers-based filters
// +--------------- bit 11-15: unused
const CategoryCount = 1 << 0xb; // shift left to first unused bit
const RealmBitsMask = 0b00000000111;
const ActionBitsMask = 0b00000000011;
const TypeBitsMask = 0b01111100000;
const TypeBitsOffset = 5;
const BlockAction = 0b00000000000;
const AllowAction = 0b00000000001;
const Important = 0b00000000010;
const BlockImportant = BlockAction | Important;
const ModifyAction = 0b00000000100;
const AnyParty = 0b00000000000;
const FirstParty = 0b00000001000;
const ThirdParty = 0b00000010000;
const AllParties = 0b00000011000;
const HEADERS = 0b10000000000;
const BLOCK_REALM = 0b00000000000000000;
const ALLOW_REALM = 0b00000000000000001;
const IMPORTANT_REALM = 0b00000000000000010;
const BLOCKIMPORTANT_REALM = BLOCK_REALM | IMPORTANT_REALM;
const ANYPARTY_REALM = 0b00000000000000000;
const FIRSTPARTY_REALM = 0b00000000000001000;
const THIRDPARTY_REALM = 0b00000000000010000;
const ALLPARTIES_REALM = FIRSTPARTY_REALM | THIRDPARTY_REALM;
const HEADERS_REALM = 0b00000010000000000;
const REDIRECT_REALM = 0b00000100000000000;
const REMOVEPARAM_REALM = 0b00001000000000000;
const CSP_REALM = 0b00010000000000000;
const PERMISSIONS_REALM = 0b00100000000000000;
const URLTRANSFORM_REALM = 0b01000000000000000;
const REPLACE_REALM = 0b10000000000000000;
const MODIFY_REALMS = REDIRECT_REALM | CSP_REALM |
REMOVEPARAM_REALM | PERMISSIONS_REALM |
URLTRANSFORM_REALM | REPLACE_REALM;
const typeNameToTypeValue = {
'no_type': 0 << TypeBitsOffset,
@ -186,6 +192,17 @@ const MODIFIER_TYPE_REMOVEPARAM = 3;
const MODIFIER_TYPE_CSP = 4;
const MODIFIER_TYPE_PERMISSIONS = 5;
const MODIFIER_TYPE_URLTRANSFORM = 6;
const MODIFIER_TYPE_REPLACE = 7;
const modifierBitsFromType = new Map([
[ MODIFIER_TYPE_REDIRECT, REDIRECT_REALM ],
[ MODIFIER_TYPE_REDIRECTRULE, REDIRECT_REALM ],
[ MODIFIER_TYPE_REMOVEPARAM, REMOVEPARAM_REALM ],
[ MODIFIER_TYPE_CSP, CSP_REALM ],
[ MODIFIER_TYPE_PERMISSIONS, PERMISSIONS_REALM ],
[ MODIFIER_TYPE_URLTRANSFORM, URLTRANSFORM_REALM ],
[ MODIFIER_TYPE_REPLACE, REPLACE_REALM ],
]);
const modifierTypeFromName = new Map([
[ 'redirect', MODIFIER_TYPE_REDIRECT ],
@ -194,6 +211,7 @@ const modifierTypeFromName = new Map([
[ 'csp', MODIFIER_TYPE_CSP ],
[ 'permissions', MODIFIER_TYPE_PERMISSIONS ],
[ 'urltransform', MODIFIER_TYPE_URLTRANSFORM ],
[ 'replace', MODIFIER_TYPE_REPLACE ],
]);
const modifierNameFromType = new Map([
@ -203,6 +221,7 @@ const modifierNameFromType = new Map([
[ MODIFIER_TYPE_CSP, 'csp' ],
[ MODIFIER_TYPE_PERMISSIONS, 'permissions' ],
[ MODIFIER_TYPE_URLTRANSFORM, 'urltransform' ],
[ MODIFIER_TYPE_REPLACE, 'replace' ],
]);
//const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111;
@ -339,7 +358,7 @@ class LogData {
this.raw = this.regex = '';
return;
}
this.result = (categoryBits & AllowAction) === 0 ? 1 : 2;
this.result = (categoryBits & ALLOW_REALM) === 0 ? 1 : 2;
const pattern = [];
const regex = [];
const options = [];
@ -356,9 +375,9 @@ class LogData {
isRegex: false,
};
filterLogData(iunit, logData);
if ( (categoryBits & ThirdParty) !== 0 ) {
if ( (categoryBits & THIRDPARTY_REALM) !== 0 ) {
logData.options.unshift('3p');
} else if ( (categoryBits & FirstParty) !== 0 ) {
} else if ( (categoryBits & FIRSTPARTY_REALM) !== 0 ) {
logData.options.unshift('1p');
}
const type = categoryBits & TypeBitsMask;
@ -373,7 +392,7 @@ class LogData {
) {
raw += '*';
}
if ( (categoryBits & AllowAction) !== 0 ) {
if ( (categoryBits & ALLOW_REALM) !== 0 ) {
raw = '@@' + raw;
}
if ( denyallow.length !== 0 ) {
@ -2171,7 +2190,7 @@ class FilterModifierResult {
}
get result() {
return (this.bits & AllowAction) === 0 ? 1 : 2;
return (this.bits & ALLOW_REALM) === 0 ? 1 : 2;
}
get value() {
@ -3188,6 +3207,7 @@ class FilterCompiler {
[ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE, MODIFIER_TYPE_REDIRECTRULE ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM, MODIFIER_TYPE_REMOVEPARAM ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM, MODIFIER_TYPE_URLTRANSFORM ],
[ sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE, MODIFIER_TYPE_REPLACE ],
]);
// These top 100 "bad tokens" are collated using the "miss" histogram
// from tokenHistograms(). The "score" is their occurrence among the
@ -3299,7 +3319,7 @@ class FilterCompiler {
}
reset() {
this.action = BlockAction;
this.action = BLOCK_REALM;
// anchor: bit vector
// 0000 (0x0): no anchoring
// 0001 (0x1): anchored to the end of the URL.
@ -3314,7 +3334,7 @@ class FilterCompiler {
this.modifyValue = undefined;
this.pattern = '';
this.patternMatchCase = false;
this.party = AnyParty;
this.party = ANYPARTY_REALM;
this.optionUnitBits = 0;
this.fromDomainOpt = '';
this.toDomainOpt = '';
@ -3395,7 +3415,7 @@ class FilterCompiler {
if ( not ) {
firstParty = !firstParty;
}
this.party |= firstParty ? FirstParty : ThirdParty;
this.party |= firstParty ? FIRSTPARTY_REALM : THIRDPARTY_REALM;
}
processHostnameList(iter, out = []) {
@ -3421,7 +3441,7 @@ class FilterCompiler {
processCspOption(value) {
this.modifyType = MODIFIER_TYPE_CSP;
this.modifyValue = value || '';
this.optionUnitBits |= this.CSP_BIT;
this.optionUnitBits |= MODIFY_BIT;
return true;
}
@ -3435,7 +3455,7 @@ class FilterCompiler {
parser.getNetFilterDenyallowOptionIterator(),
);
if ( this.denyallowOpt === '' ) { return false; }
this.optionUnitBits |= this.DENYALLOW_BIT;
this.optionUnitBits |= DENYALLOW_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_FROM:
this.fromDomainOpt = this.processHostnameList(
@ -3443,58 +3463,44 @@ class FilterCompiler {
this.fromDomainOptList
);
if ( this.fromDomainOpt === '' ) { return false; }
this.optionUnitBits |= this.FROM_BIT;
this.optionUnitBits |= FROM_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: {
this.headerOpt = parser.getNetOptionValue(id) || '';
this.optionUnitBits |= this.HEADER_BIT;
this.optionUnitBits |= HEADER_BIT;
break;
}
case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD:
this.processMethodOption(parser.getNetOptionValue(id));
this.optionUnitBits |= this.METHOD_BIT;
this.optionUnitBits |= METHOD_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS:
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE:
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.PERMISSIONS_BIT;
this.optionUnitBits |= MODIFY_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: {
const actualId = this.action === AllowAction
const actualId = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: id;
if ( this.processModifierOption(actualId, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REDIRECT_BIT;
this.optionUnitBits |= MODIFY_BIT;
break;
}
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REDIRECT_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REMOVEPARAM_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_TO:
this.toDomainOpt = this.processHostnameList(
parser.getNetFilterToOptionIterator(),
this.toDomainOptList
);
if ( this.toDomainOpt === '' ) { return false; }
this.optionUnitBits |= this.TO_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) {
return false;
}
this.optionUnitBits |= this.REDIRECT_BIT;
this.optionUnitBits |= TO_BIT;
break;
default:
break;
@ -3511,7 +3517,7 @@ class FilterCompiler {
}
if ( parser.isException() ) {
this.action = AllowAction;
this.action = ALLOW_REALM;
}
if ( parser.isLeftHnAnchored() ) {
@ -3539,14 +3545,14 @@ class FilterCompiler {
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT1P:
this.strictParty = this.strictParty === -1 ? 0 : 1;
this.optionUnitBits |= this.STRICT_PARTY_BIT;
this.optionUnitBits |= STRICT_PARTY_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_3P:
this.processPartyOption(false, parser.isNegatedOption(type));
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT3P:
this.strictParty = this.strictParty === 1 ? 0 : -1;
this.optionUnitBits |= this.STRICT_PARTY_BIT;
this.optionUnitBits |= STRICT_PARTY_BIT;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_ALL:
this.processTypeOption(-1);
@ -3586,6 +3592,7 @@ class FilterCompiler {
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT:
case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM:
case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE:
case sfp.NODE_TYPE_NET_OPTION_NAME_TO:
case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
if ( this.processOptionWithValue(parser, type) === false ) {
@ -3599,31 +3606,31 @@ class FilterCompiler {
break;
}
case sfp.NODE_TYPE_NET_OPTION_NAME_EMPTY: {
const id = this.action === AllowAction
const id = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT;
if ( this.processModifierOption(id, 'empty') === false ) {
return this.FILTER_INVALID;
}
this.optionUnitBits |= this.REDIRECT_BIT;
this.optionUnitBits |= MODIFY_BIT;
break;
}
case sfp.NODE_TYPE_NET_OPTION_NAME_IMPORTANT:
this.optionUnitBits |= this.IMPORTANT_BIT;
this.action = BlockImportant;
this.optionUnitBits |= IMPORTANT_BIT;
this.action = BLOCKIMPORTANT_REALM;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_MATCHCASE:
this.patternMatchCase = true;
break;
case sfp.NODE_TYPE_NET_OPTION_NAME_MP4: {
const id = this.action === AllowAction
const id = this.action === ALLOW_REALM
? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE
: sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT;
if ( this.processModifierOption(id, 'noopmp4-1s') === false ) {
return this.FILTER_INVALID;
}
this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, false);
this.optionUnitBits |= this.REDIRECT_BIT;
this.optionUnitBits |= MODIFY_BIT;
break;
}
default:
@ -3631,8 +3638,8 @@ class FilterCompiler {
}
}
if ( this.party === AllParties ) {
this.party = AnyParty;
if ( this.party === ALLPARTIES_REALM ) {
this.party = ANYPARTY_REALM;
}
// Negated network types? Toggle on all network type bits.
@ -3649,7 +3656,7 @@ class FilterCompiler {
} else {
this.typeBits &= ~this.notTypeBits;
}
this.optionUnitBits |= this.NOT_TYPE_BIT;
this.optionUnitBits |= NOT_TYPE_BIT;
}
// CSP/permissions options implicitly apply only to
@ -3813,7 +3820,7 @@ class FilterCompiler {
}
isJustOrigin() {
if ( this.optionUnitBits !== this.FROM_BIT ) { return false; }
if ( this.optionUnitBits !== FROM_BIT ) { return false; }
if ( this.isRegex ) { return false; }
if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; }
if ( this.pattern === '*' ) { return true; }
@ -3857,7 +3864,7 @@ class FilterCompiler {
if ( parser.options.toDNR !== true ) {
const parsedBlock = this.clone();
parsedBlock.modifyType = undefined;
parsedBlock.optionUnitBits &= ~this.REDIRECT_BIT;
parsedBlock.optionUnitBits &= ~MODIFY_BIT;
parsedBlock.compileToFilter(writer);
}
}
@ -3958,14 +3965,14 @@ class FilterCompiler {
// Header
if ( this.headerOpt !== undefined ) {
units.push(FilterOnHeaders.compile(this));
this.action |= HEADERS;
this.action |= HEADERS_REALM;
}
// Important
//
// IMPORTANT: must always appear at the end of the sequence, so as to
// ensure $isBlockImportant is set only for matching filters.
if ( (this.optionUnitBits & this.IMPORTANT_BIT) !== 0 ) {
if ( (this.optionUnitBits & IMPORTANT_BIT) !== 0 ) {
units.push(FilterImportant.compile());
}
@ -3974,7 +3981,8 @@ class FilterCompiler {
// IMPORTANT: the modifier unit MUST always appear first in a sequence
if ( this.modifyType !== undefined ) {
units.unshift(FilterModifier.compile(this));
this.action = (this.action & ~ActionBitsMask) | ModifyAction;
this.action = (this.action & ~ActionBitsMask) |
modifierBitsFromType.get(this.modifyType);
}
this.compileToAtomicFilter(
@ -4047,18 +4055,16 @@ class FilterCompiler {
}
}
FilterCompiler.prototype.FROM_BIT = 0b000000000001;
FilterCompiler.prototype.TO_BIT = 0b000000000010;
FilterCompiler.prototype.DENYALLOW_BIT = 0b000000000100;
FilterCompiler.prototype.HEADER_BIT = 0b000000001000;
FilterCompiler.prototype.STRICT_PARTY_BIT = 0b000000010000;
FilterCompiler.prototype.CSP_BIT = 0b000000100000;
FilterCompiler.prototype.REMOVEPARAM_BIT = 0b000001000000;
FilterCompiler.prototype.REDIRECT_BIT = 0b000010000000;
FilterCompiler.prototype.NOT_TYPE_BIT = 0b000100000000;
FilterCompiler.prototype.IMPORTANT_BIT = 0b001000000000;
FilterCompiler.prototype.METHOD_BIT = 0b010000000000;
FilterCompiler.prototype.PERMISSIONS_BIT = 0b100000000000;
// These are to quickly test whether a filter is composite
const FROM_BIT = 0b000000001;
const TO_BIT = 0b000000010;
const DENYALLOW_BIT = 0b000000100;
const HEADER_BIT = 0b000001000;
const STRICT_PARTY_BIT = 0b000010000;
const MODIFY_BIT = 0b000100000;
const NOT_TYPE_BIT = 0b001000000;
const IMPORTANT_BIT = 0b010000000;
const METHOD_BIT = 0b100000000;
FilterCompiler.prototype.FILTER_OK = 0;
FilterCompiler.prototype.FILTER_INVALID = 1;
@ -4068,16 +4074,15 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2;
/******************************************************************************/
const FilterContainer = function() {
this.compilerVersion = '8';
this.selfieVersion = '9';
this.compilerVersion = '10';
this.selfieVersion = '10';
this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
this.optimizeTaskId = undefined;
// As long as CategoryCount is reasonably low, we will use an array to
// store buckets using category bits as index. If ever CategoryCount
// becomes too large, we can just go back to using a Map.
this.bitsToBucketIndices = JSON.parse(`[${'0,'.repeat(CategoryCount-1)}0]`);
this.buckets = [ new Map() ];
this.bitsToBucket = new Map();
this.goodFilters = new Set();
this.badFilters = new Set();
this.unitsToOptimize = [];
@ -4105,8 +4110,7 @@ FilterContainer.prototype.reset = function() {
this.goodFilters.clear();
this.badFilters.clear();
this.unitsToOptimize.length = 0;
this.bitsToBucketIndices.fill(0);
this.buckets.length = 1;
this.bitsToBucket.clear();
urlTokenizer.resetKnownTokens();
@ -4145,16 +4149,14 @@ FilterContainer.prototype.freeze = function() {
const args = unserialize(line);
const bits = args[0];
let ibucket = this.bitsToBucketIndices[bits];
if ( ibucket === 0 ) {
ibucket = this.bitsToBucketIndices[bits] = this.buckets.length;
this.buckets.push(new Map());
const bucket = this.bitsToBucket.get(bits) || (new Map());
if ( bucket.size === 0 ) {
this.bitsToBucket.set(bits, bucket);
}
const tokenHash = args[1];
const fdata = args[2];
const bucket = this.buckets[ibucket];
let iunit = bucket.get(tokenHash) || 0;
if ( tokenHash === DOT_TOKEN_HASH ) {
@ -4204,9 +4206,9 @@ FilterContainer.prototype.freeze = function() {
// the block-important realm should be checked when and only when
// there is a matched exception filter, which important filters are
// meant to override.
if ( (bits & ActionBitsMask) === BlockImportant ) {
if ( (bits & ActionBitsMask) === BLOCKIMPORTANT_REALM ) {
this.addFilterUnit(
bits & ~Important,
bits & ~IMPORTANT_REALM,
tokenHash,
filterFromCompiled(fdata)
);
@ -4348,14 +4350,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
}
const realms = new Map([
[ BlockAction, 'block' ],
[ AllowAction, 'allow' ],
[ ModifyAction, 'modify' ],
[ BLOCK_REALM, 'block' ],
[ ALLOW_REALM, 'allow' ],
[ MODIFY_REALMS, 'modify' ],
]);
const partyness = new Map([
[ AnyParty, '' ],
[ FirstParty, 'firstParty' ],
[ ThirdParty, 'thirdParty' ],
[ ANYPARTY_REALM, '' ],
[ FIRSTPARTY_REALM, 'firstParty' ],
[ THIRDPARTY_REALM, 'thirdParty' ],
]);
const types = new Set([
'no_type',
@ -4403,7 +4405,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
// Collect generichide filters
const generichideExclusions = [];
{
const bucket = buckets.get(AllowAction | typeNameToTypeValue['generichide']);
const bucket = buckets.get(ALLOW_REALM | typeNameToTypeValue['generichide']);
if ( bucket ) {
for ( const rules of bucket.values() ) {
for ( const rule of rules ) {
@ -4460,7 +4462,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
operation: 'append',
value: rule.__modifierValue,
}];
if ( rule.__modifierAction === AllowAction ) {
if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception');
}
break;
@ -4471,7 +4473,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
operation: 'append',
value: rule.__modifierValue.split('|').join(', '),
}];
if ( rule.__modifierAction === AllowAction ) {
if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception');
}
break;
@ -4489,7 +4491,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
if ( rule.__modifierValue !== '' && resource === undefined ) {
dnrAddRuleError(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`);
}
if ( rule.__modifierAction !== AllowAction ) {
if ( rule.__modifierAction !== ALLOW_REALM ) {
const extensionPath = resource || token;
rule.action.type = 'redirect';
rule.action.redirect = { extensionPath };
@ -4534,14 +4536,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) {
'xmlhttprequest',
];
}
if ( rule.__modifierAction === AllowAction ) {
if ( rule.__modifierAction === ALLOW_REALM ) {
dnrAddRuleError(rule, 'Unsupported modifier exception');
}
break;
case 'urltransform': {
const path = rule.__modifierValue;
let priority = rule.priority || 1;
if ( rule.__modifierAction !== AllowAction ) {
if ( rule.__modifierAction !== ALLOW_REALM ) {
const transform = { path };
rule.action.type = 'redirect';
rule.action.redirect = { transform };
@ -4574,12 +4576,10 @@ FilterContainer.prototype.addFilterUnit = function(
tokenHash,
inewunit
) {
let ibucket = this.bitsToBucketIndices[bits];
if ( ibucket === 0 ) {
ibucket = this.bitsToBucketIndices[bits] = this.buckets.length;
this.buckets.push(new Map());
const bucket = this.bitsToBucket.get(bits) || (new Map());
if ( bucket.size === 0 ) {
this.bitsToBucket.set(bits, bucket);
}
const bucket = this.buckets[ibucket];
const istoredunit = bucket.get(tokenHash) || 0;
if ( istoredunit === 0 ) {
bucket.set(tokenHash, inewunit);
@ -4614,7 +4614,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
const t0 = Date.now();
while ( this.unitsToOptimize.length !== 0 ) {
const { bits, tokenHash } = this.unitsToOptimize.pop();
const bucket = this.buckets[this.bitsToBucketIndices[bits]];
const bucket = this.bitsToBucket.get(bits);
const iunit = bucket.get(tokenHash);
const fc = filterGetClass(iunit);
switch ( fc ) {
@ -4623,7 +4623,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) {
break;
case FilterBucket: {
const optimizeBits =
(tokenHash === NO_TOKEN_HASH) || (bits & ModifyAction) !== 0
(tokenHash === NO_TOKEN_HASH) || (bits & MODIFY_REALMS) !== 0
? 0b10
: 0b01;
const inewunit = FilterBucket.optimize(iunit, optimizeBits);
@ -4658,14 +4658,6 @@ FilterContainer.prototype.toSelfie = async function(storage, path) {
if ( typeof storage !== 'object' || storage === null ) { return; }
if ( typeof storage.put !== 'function' ) { return; }
const bucketsToSelfie = ( ) => {
const selfie = [];
for ( const bucket of this.buckets ) {
selfie.push(Array.from(bucket));
}
return selfie;
};
bidiTrieOptimize(true);
keyvalStore.setItem(
'SNFE.origHNTrieContainer.trieDetails',
@ -4700,8 +4692,10 @@ FilterContainer.prototype.toSelfie = async function(storage, path) {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
discardedCount: this.discardedCount,
bitsToBucketIndices: this.bitsToBucketIndices,
buckets: bucketsToSelfie(),
bitsToBucket: Array.from(this.bitsToBucket).map(kv => {
kv[1] = Array.from(kv[1]);
return kv;
}),
urlTokenizer: urlTokenizer.toSelfie(),
})
)
@ -4750,12 +4744,6 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) {
if ( results.slice(1).every(v => v === true) === false ) { return false; }
const bucketsFromSelfie = selfie => {
for ( let i = 0; i < selfie.length; i++ ) {
this.buckets[i] = new Map(selfie[i]);
}
};
const details = results[0];
if ( typeof details !== 'object' || details === null ) { return false; }
if ( typeof details.content !== 'string' ) { return false; }
@ -4770,8 +4758,10 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) {
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.discardedCount = selfie.discardedCount;
this.bitsToBucketIndices = selfie.bitsToBucketIndices;
bucketsFromSelfie(selfie.buckets);
this.bitsToBucket = new Map(selfie.bitsToBucket.map(kv => {
kv[1] = new Map(kv[1]);
return kv;
}));
urlTokenizer.fromSelfie(selfie.urlTokenizer);
// If this point is never reached, it means the internal state is
@ -4837,38 +4827,37 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
$requestMethodBit = fctxt.method || 0;
$requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
const modifierType = modifierTypeFromName.get(modifierName);
const modifierBits = modifierBitsFromType.get(modifierType);
const catBits00 = ModifyAction;
const catBits01 = ModifyAction | typeBits;
const catBits10 = ModifyAction | partyBits;
const catBits11 = ModifyAction | typeBits | partyBits;
const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
const ibucket00 = this.bitsToBucketIndices[catBits00];
const ibucket01 = typeBits !== 0 ? this.bitsToBucketIndices[catBits01]
: 0;
const ibucket10 = partyBits !== 0
? this.bitsToBucketIndices[catBits10]
: 0;
const ibucket11 = typeBits !== 0 && partyBits !== 0
? this.bitsToBucketIndices[catBits11]
: 0;
const catBits00 = modifierBits;
const catBits01 = modifierBits | typeBits;
const catBits10 = modifierBits | partyBits;
const catBits11 = modifierBits | typeBits | partyBits;
const bucket00 = this.bitsToBucket.get(catBits00);
const bucket01 = typeBits !== 0
? this.bitsToBucket.get(catBits01)
: undefined;
const bucket10 = partyBits !== 0
? this.bitsToBucket.get(catBits10)
: undefined;
const bucket11 = typeBits !== 0 && partyBits !== 0
? this.bitsToBucket.get(catBits11)
: undefined;
if (
ibucket00 === 0 && ibucket01 === 0 &&
ibucket10 === 0 && ibucket11 === 0
bucket00 === undefined && bucket01 === undefined &&
bucket10 === undefined && bucket11 === undefined
) {
return;
}
const bucket00 = this.buckets[ibucket00];
const bucket01 = this.buckets[ibucket01];
const bucket10 = this.buckets[ibucket10];
const bucket11 = this.buckets[ibucket11];
const results = [];
const env = {
type: modifierTypeFromName.get(modifierName) || 0,
type: modifierType || 0,
bits: 0,
th: 0,
iunit: 0,
@ -4884,28 +4873,28 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
env.th = th;
$tokenBeg = tokenHashes[i+1];
if (
(ibucket00 !== 0) &&
(bucket00 !== undefined) &&
(iunit = bucket00.get(th) || 0) !== 0
) {
env.bits = catBits00; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env);
}
if (
(ibucket01 !== 0) &&
(bucket01 !== undefined) &&
(iunit = bucket01.get(th) || 0) !== 0
) {
env.bits = catBits01; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env);
}
if (
(ibucket10 !== 0) &&
(bucket10 !== undefined) &&
(iunit = bucket10.get(th) || 0) !== 0
) {
env.bits = catBits10; env.iunit = iunit;
filterMatchAndFetchModifiers(iunit, env);
}
if (
(ibucket11 !== 0) &&
(bucket11 !== undefined) &&
(iunit = bucket11.get(th) || 0) !== 0
) {
env.bits = catBits11; env.iunit = iunit;
@ -4921,7 +4910,7 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
// occurrences.
if ( results.length === 1 ) {
const result = results[0];
if ( (result.bits & AllowAction) !== 0 ) { return; }
if ( (result.bits & ALLOW_REALM) !== 0 ) { return; }
return [ result ];
}
@ -4932,9 +4921,9 @@ FilterContainer.prototype.matchAndFetchModifiers = function(
for ( const result of results ) {
const actionBits = result.bits & ActionBitsMask;
const modifyValue = result.value;
if ( actionBits === BlockImportant ) {
if ( actionBits === BLOCKIMPORTANT_REALM ) {
toAddImportant.set(modifyValue, result);
} else if ( actionBits === BlockAction ) {
} else if ( actionBits === BLOCK_REALM ) {
toAdd.set(modifyValue, result);
} else {
toRemove.set(modifyValue, result);
@ -5014,55 +5003,50 @@ FilterContainer.prototype.realmMatchString = function(
const catBits10 = realmBits | partyBits;
const catBits11 = realmBits | typeBits | partyBits;
const ibucket00 = exactType === 0
? this.bitsToBucketIndices[catBits00]
: 0;
const ibucket01 = exactType !== 0 || typeBits !== 0
? this.bitsToBucketIndices[catBits01]
: 0;
const ibucket10 = exactType === 0 && partyBits !== 0
? this.bitsToBucketIndices[catBits10]
: 0;
const ibucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
? this.bitsToBucketIndices[catBits11]
: 0;
const bucket00 = exactType === 0
? this.bitsToBucket.get(catBits00)
: undefined;
const bucket01 = exactType !== 0 || typeBits !== 0
? this.bitsToBucket.get(catBits01)
: undefined;
const bucket10 = exactType === 0 && partyBits !== 0
? this.bitsToBucket.get(catBits10)
: undefined;
const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0
? this.bitsToBucket.get(catBits11)
: undefined;
if (
ibucket00 === 0 && ibucket01 === 0 &&
ibucket10 === 0 && ibucket11 === 0
bucket00 === undefined && bucket01 === undefined &&
bucket10 === undefined && bucket11 === undefined
) {
return false;
}
const bucket00 = this.buckets[ibucket00];
const bucket01 = this.buckets[ibucket01];
const bucket10 = this.buckets[ibucket10];
const bucket11 = this.buckets[ibucket11];
let catBits = 0, iunit = 0;
// Pure hostname-based filters
let tokenHash = DOT_TOKEN_HASH;
if (
(ibucket00 !== 0) &&
(bucket00 !== undefined) &&
(iunit = bucket00.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
catBits = catBits00;
} else if (
(ibucket01 !== 0) &&
(bucket01 !== undefined) &&
(iunit = bucket01.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
catBits = catBits01;
} else if (
(ibucket10 !== 0) &&
(bucket10 !== undefined) &&
(iunit = bucket10.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
catBits = catBits10;
} else if (
(ibucket11 !== 0) &&
(bucket11 !== undefined) &&
(iunit = bucket11.get(DOT_TOKEN_HASH) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
@ -5077,7 +5061,7 @@ FilterContainer.prototype.realmMatchString = function(
if ( tokenHash === INVALID_TOKEN_HASH ) { return false; }
$tokenBeg = tokenHashes[i+1];
if (
(ibucket00 !== 0) &&
(bucket00 !== undefined) &&
(iunit = bucket00.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
@ -5085,7 +5069,7 @@ FilterContainer.prototype.realmMatchString = function(
break;
}
if (
(ibucket01 !== 0) &&
(bucket01 !== undefined) &&
(iunit = bucket01.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
@ -5093,7 +5077,7 @@ FilterContainer.prototype.realmMatchString = function(
break;
}
if (
(ibucket10 !== 0) &&
(bucket10 !== undefined) &&
(iunit = bucket10.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
@ -5101,7 +5085,7 @@ FilterContainer.prototype.realmMatchString = function(
break;
}
if (
(ibucket11 !== 0) &&
(bucket11 !== undefined) &&
(iunit = bucket11.get(tokenHash) || 0) !== 0 &&
(filterMatch(iunit) === true)
) {
@ -5147,9 +5131,9 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) {
$docDomain = domainFromHostname($docHostname);
// Exception filters
if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) {
if ( this.realmMatchString(ALLOW_REALM, typeBits, FIRSTPARTY_REALM) ) {
// Important block filters.
if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) {
if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, FIRSTPARTY_REALM) ) {
return 1;
}
return 2;
@ -5194,7 +5178,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
typeBits |= 0x80000000;
}
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(fctxt.url);
@ -5211,11 +5195,11 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
// Evaluate block realm before allow realm, and allow realm before
// block-important realm, i.e. by order of likelihood of a match.
const r = this.realmMatchString(BlockAction, typeBits, partyBits);
const r = this.realmMatchString(BLOCK_REALM, typeBits, partyBits);
if ( r || (modifiers & 0b0010) !== 0 ) {
if ( $isBlockImportant ) { return 1; }
if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) {
if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) {
if ( this.realmMatchString(ALLOW_REALM, typeBits, partyBits) ) {
if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, partyBits) ) {
return 1;
}
return 2;
@ -5229,7 +5213,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) {
FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue;
const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty;
const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM;
// Prime tokenizer: we get a normalized URL in return.
$requestURL = urlTokenizer.setURL(fctxt.url);
@ -5245,13 +5229,13 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) {
$httpHeaders.init(headers);
let r = 0;
if ( this.realmMatchString(HEADERS | BlockAction, typeBits, partyBits) ) {
if ( this.realmMatchString(HEADERS_REALM | BLOCK_REALM, typeBits, partyBits) ) {
r = 1;
}
if ( r !== 0 && $isBlockImportant !== true ) {
if ( this.realmMatchString(HEADERS | AllowAction, typeBits, partyBits) ) {
if ( this.realmMatchString(HEADERS_REALM | ALLOW_REALM, typeBits, partyBits) ) {
r = 2;
if ( this.realmMatchString(HEADERS | BlockImportant, typeBits, partyBits) ) {
if ( this.realmMatchString(HEADERS_REALM | BLOCKIMPORTANT_REALM, typeBits, partyBits) ) {
r = 1;
}
}
@ -5275,7 +5259,7 @@ FilterContainer.prototype.redirectRequest = function(redirectEngine, fctxt) {
}
// Redirect to highest-ranked directive
const directive = directives[highest];
if ( (directive.bits & AllowAction) !== 0 ) { return directives; }
if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; }
const { token } = parseRedirectRequestValue(directive);
fctxt.redirectURL = redirectEngine.tokenToURL(fctxt, token);
if ( fctxt.redirectURL === undefined ) { return; }
@ -5286,7 +5270,7 @@ FilterContainer.prototype.transformRequest = function(fctxt) {
const directives = this.matchAndFetchModifiers(fctxt, 'urltransform');
if ( directives === undefined ) { return; }
const directive = directives[directives.length-1];
if ( (directive.bits & AllowAction) !== 0 ) { return directives; }
if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; }
const redirectURL = new URL(fctxt.url);
if ( directive.value === redirectURL.pathname ) { return; }
redirectURL.pathname = directive.value;
@ -5309,10 +5293,10 @@ function compareRedirectRequests(redirectEngine, a, b) {
parseRedirectRequestValue(b);
if ( redirectEngine.hasToken(btok) === false ) { return 1; }
if ( abits !== bbits ) {
if ( (abits & Important) !== 0 ) { return 1; }
if ( (bbits & Important) !== 0 ) { return -1; }
if ( (abits & AllowAction) !== 0 ) { return -1; }
if ( (bbits & AllowAction) !== 0 ) { return 1; }
if ( (abits & IMPORTANT_REALM) !== 0 ) { return 1; }
if ( (bbits & IMPORTANT_REALM) !== 0 ) { return -1; }
if ( (abits & ALLOW_REALM) !== 0 ) { return -1; }
if ( (bbits & ALLOW_REALM) !== 0 ) { return 1; }
}
return aint - bint;
}
@ -5348,7 +5332,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) {
const out = [];
for ( const directive of directives ) {
if ( params.size === 0 ) { break; }
const isException = (directive.bits & AllowAction) !== 0;
const isException = (directive.bits & ALLOW_REALM) !== 0;
if ( isException && directive.value === '' ) {
out.push(directive);
break;
@ -5471,10 +5455,8 @@ FilterContainer.prototype.test = async function(docURL, type, url) {
FilterContainer.prototype.bucketHistogram = function() {
const results = [];
for ( let bits = 0; bits < this.bitsToBucketIndices.length; bits++ ) {
const ibucket = this.bitsToBucketIndices[bits];
if ( ibucket === 0 ) { continue; }
for ( const [ th, iunit ] of this.buckets[ibucket] ) {
for ( const [ bits, bucket ] of this.bitsToBucket ) {
for ( const [ th, iunit ] of bucket ) {
const token = urlTokenizer.stringFromTokenHash(th);
const fc = filterGetClass(iunit);
const count = fc.getCount !== undefined ? fc.getCount(iunit) : 1;
@ -5553,15 +5535,20 @@ FilterContainer.prototype.dump = function() {
const thCounts = new Set();
const realms = new Map([
[ BlockAction, 'block' ],
[ BlockImportant, 'block-important' ],
[ AllowAction, 'unblock' ],
[ ModifyAction, 'modify' ],
[ BLOCK_REALM, 'block' ],
[ BLOCKIMPORTANT_REALM, 'block-important' ],
[ ALLOW_REALM, 'unblock' ],
[ REDIRECT_REALM, 'redirect' ],
[ REMOVEPARAM_REALM, 'removeparam' ],
[ CSP_REALM, 'csp' ],
[ PERMISSIONS_REALM, 'permissions' ],
[ URLTRANSFORM_REALM, 'urltransform' ],
[ REPLACE_REALM, 'replace' ],
]);
const partyness = new Map([
[ AnyParty, 'any-party' ],
[ FirstParty, '1st-party' ],
[ ThirdParty, '3rd-party' ],
[ ANYPARTY_REALM, 'any-party' ],
[ FIRSTPARTY_REALM, '1st-party' ],
[ THIRDPARTY_REALM, '3rd-party' ],
]);
for ( const [ realmBits, realmName ] of realms ) {
toOutput(1, `+ realm: ${realmName}`);
@ -5573,11 +5560,11 @@ FilterContainer.prototype.dump = function() {
if ( processedTypeBits.has(typeBits) ) { continue; }
processedTypeBits.add(typeBits);
const bits = realmBits | partyBits | typeBits;
const ibucket = this.bitsToBucketIndices[bits];
if ( ibucket === 0 ) { continue; }
const thCount = this.buckets[ibucket].size;
const bucket = this.bitsToBucket.get(bits);
if ( bucket === undefined ) { continue; }
const thCount = bucket.size;
toOutput(3, `+ type: ${typeName} (${thCount})`);
for ( const [ th, iunit ] of this.buckets[ibucket] ) {
for ( const [ th, iunit ] of bucket) {
thCounts.add(th);
const ths = thConstants.has(th)
? thConstants.get(th)

View File

@ -32,6 +32,7 @@ import scriptletFilteringEngine from './scriptlet-filtering.js';
import staticNetFilteringEngine from './static-net-filtering.js';
import textEncode from './text-encode.js';
import µb from './background.js';
import * as sfp from './static-filtering-parser.js';
import {
sessionFirewall,
@ -483,11 +484,10 @@ const onBeforeBehindTheSceneRequest = function(fctxt) {
const onHeadersReceived = function(details) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/610
// Process behind-the-scene requests in a special way.
if (
details.tabId < 0 &&
normalizeBehindTheSceneResponseHeaders(details) === false
) {
return;
if ( details.tabId < 0 ) {
if ( normalizeBehindTheSceneResponseHeaders(details) === false ) {
return;
}
}
const fctxt = µb.filteringContext.fromWebrequestDetails(details);
@ -524,8 +524,6 @@ const onHeadersReceived = function(details) {
}
}
if ( isRootDoc === false && fctxt.itype !== fctxt.SUB_FRAME ) { return; }
// https://github.com/gorhill/uBlock/issues/2813
// Disable the blocking of large media elements if the document is itself
// a media element: the resource was not prevented from loading so no
@ -539,10 +537,29 @@ const onHeadersReceived = function(details) {
}
}
// At this point we have a HTML document.
const filteredHTML =
µb.canFilterResponseData && filterDocument(fctxt, details) === true;
const bodyFilterSession = bodyFilterer.canFilter(fctxt, details);
if ( bodyFilterSession !== undefined ) {
// `replace=` filter option
const replaceDirectives =
staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace');
if ( replaceDirectives ) {
bodyFilterSession.addJob({
fn: textResponseFilterer,
args: [ replaceDirectives ],
});
}
// html filtering
if ( isRootDoc || fctxt.itype === fctxt.SUB_FRAME ) {
const selectors = htmlFilteringEngine.retrieve(bodyFilterSession);
if ( selectors ) {
bodyFilterSession.addJob({
fn: htmlResponseFilterer,
args: [ selectors ],
});
}
}
bodyFilterSession.launch();
}
let modifiedHeaders = false;
if ( httpheaderFilteringEngine.apply(fctxt, responseHeaders) === true ) {
@ -551,7 +568,6 @@ const onHeadersReceived = function(details) {
if ( injectCSP(fctxt, pageStore, responseHeaders) === true ) {
modifiedHeaders = true;
}
if ( injectPP(fctxt, pageStore, responseHeaders) === true ) {
modifiedHeaders = true;
}
@ -562,7 +578,7 @@ const onHeadersReceived = function(details) {
// https://github.com/uBlockOrigin/uBlock-issues/issues/229
// Use `no-cache` instead of `no-cache, no-store, must-revalidate`, this
// allows Firefox's offline mode to work as expected.
if ( (filteredHTML || modifiedHeaders) && dontCacheResponseHeaders ) {
if ( modifiedHeaders && dontCacheResponseHeaders ) {
const cacheControl = µb.hiddenSettings.cacheControlForFirefox1376932;
if ( cacheControl !== 'unset' ) {
let i = headerIndexFromName('cache-control', responseHeaders);
@ -601,272 +617,301 @@ const normalizeBehindTheSceneResponseHeaders = function(details) {
return true;
};
/******************************************************************************/
function textResponseFilterer(session, directives) {
const applied = [];
for ( const directive of directives ) {
if ( directive.refs instanceof Object === false ) { continue; }
const { refs } = directive;
if ( refs.$cache === null ) {
refs.$cache = sfp.parseReplaceValue(refs.value);
}
const cache = refs.$cache;
if ( cache === undefined ) { continue; }
if ( cache.re.test(session.getString()) !== true ) { continue; }
session.setString(session.getString().replace(
cache.re,
cache.replacement
));
applied.push(directive);
}
if ( applied && logger.enabled ) {
session.setRealm('network')
.pushFilters(applied.map(a => a.logData()))
.toLogger();
}
return applied.length !== 0;
}
/******************************************************************************/
function htmlResponseFilterer(session, selectors) {
if ( htmlResponseFilterer.domParser === null ) {
htmlResponseFilterer.domParser = new DOMParser();
htmlResponseFilterer.xmlSerializer = new XMLSerializer();
}
const doc = htmlResponseFilterer.domParser.parseFromString(
session.getString(),
session.mime
);
if ( selectors !== undefined ) {
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) {
return false;
}
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
const doctypeStr = [
doc.doctype instanceof Object ?
htmlResponseFilterer.xmlSerializer.serializeToString(doc.doctype) + '\n' :
'',
doc.documentElement.outerHTML,
].join('\n');
session.setString(doctypeStr);
return true;
}
htmlResponseFilterer.domParser = null;
htmlResponseFilterer.xmlSerializer = null;
/*******************************************************************************
The response body filterer is responsible for:
- Realize static network filter option `replace=`
- HTML filtering
In the spirit of efficiency, the response body filterer works this way:
If:
- HTML filtering: no.
Then:
No response body filtering is initiated.
If:
- HTML filtering: yes.
Then:
Assemble all response body data into a single buffer. Once all the
response data has been received, create a document from it. Then:
- Remove all DOM elements matching HTML filters.
Then serialize the resulting modified document as the new response
body.
**/
const filterDocument = (( ) => {
const filterers = new Map();
let domParser, xmlSerializer,
utf8TextDecoder, textDecoder, textEncoder;
const textDecode = function(encoding, buffer) {
if (
textDecoder !== undefined &&
textDecoder.encoding !== encoding
) {
textDecoder = undefined;
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(encoding);
}
return textDecoder.decode(buffer);
};
const bodyFilterer = (( ) => {
const sessions = new Map();
const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i;
const reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
const otherValidMimes = new Set([
'application/javascript',
'application/json',
'application/xml',
'application/xhtml+xml',
]);
let textDecoder, textEncoder;
const mimeFromContentType = function(contentType) {
const mimeFromContentType = contentType => {
const match = reContentTypeDocument.exec(contentType);
if ( match !== null ) {
return match[0].toLowerCase();
}
if ( match === null ) { return; }
return match[0].toLowerCase();
};
const charsetFromContentType = function(contentType) {
const charsetFromContentType = contentType => {
const match = reContentTypeCharset.exec(contentType);
if ( match !== null ) {
return match[1].toLowerCase();
}
if ( match === null ) { return; }
return match[1].toLowerCase();
};
const charsetFromDoc = function(doc) {
let meta = doc.querySelector('meta[charset]');
if ( meta !== null ) {
return meta.getAttribute('charset').toLowerCase();
const charsetFromStream = bytes => {
if ( bytes.length < 3 ) { return; }
if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) {
return 'utf-8';
}
meta = doc.querySelector(
'meta[http-equiv="content-type" i][content]'
);
if ( meta !== null ) {
return charsetFromContentType(meta.getAttribute('content'));
let i = -1;
while ( i < 65536 ) {
i += 1;
/* c */ if ( bytes[i+0] !== 0x63 ) { continue; }
/* h */ if ( bytes[i+1] !== 0x68 ) { continue; }
/* a */ if ( bytes[i+2] !== 0x61 ) { continue; }
/* r */ if ( bytes[i+3] !== 0x72 ) { continue; }
/* s */ if ( bytes[i+4] !== 0x73 ) { continue; }
/* e */ if ( bytes[i+5] !== 0x65 ) { continue; }
/* t */ if ( bytes[i+6] !== 0x74 ) { continue; }
break;
}
if ( (i - 40) >= 65536 ) { return; }
i += 8;
// find first alpha character
let j = 0;
while ( j < 8 ) {
j += 1;
const c = bytes[i+j];
if ( c >= 0x41 && c <= 0x5A ) { break; }
if ( c >= 0x61 && c <= 0x7A ) { break; }
j += 1;
}
if ( j === 8 ) { return; }
i += j;
// Collect characters until first non charset-name-character
const chars = [];
j = 0;
while ( j < 24 ) {
const c = bytes[i+j];
if ( c < 0x2D ) { break; }
if ( c > 0x2D && c < 0x30 ) { break; }
if ( c > 0x39 && c < 0x41 ) { break; }
if ( c > 0x5A && c < 0x61 ) { break; }
if ( c > 0x7A ) { break; }
chars.push(c);
j += 1;
}
if ( j === 20 ) { return; }
return String.fromCharCode(...chars).toLowerCase();
};
const streamClose = function(filterer, buffer) {
const streamClose = (session, buffer) => {
if ( buffer !== undefined ) {
filterer.stream.write(buffer);
} else if ( filterer.buffer !== undefined ) {
filterer.stream.write(filterer.buffer);
session.stream.write(buffer);
} else if ( session.buffer !== undefined ) {
session.stream.write(session.buffer);
}
filterer.stream.close();
session.stream.close();
};
const onStreamData = function(ev) {
const filterer = filterers.get(this);
if ( filterer === undefined ) {
const session = sessions.get(this);
if ( session === undefined ) {
this.write(ev.data);
this.disconnect();
return;
}
if (
this.status !== 'transferringdata' &&
this.status !== 'finishedtransferringdata'
) {
filterers.delete(this);
this.disconnect();
return;
if ( this.status !== 'transferringdata' ) {
if ( this.status !== 'finishedtransferringdata' ) {
sessions.delete(this);
this.disconnect();
return;
}
}
// TODO:
// - Possibly improve buffer growth, if benchmarking shows it's worth
// it.
// - Also evaluate whether keeping a list of buffers and then decoding
// them in sequence using TextDecoder's "stream" option is more
// efficient. Can the data buffers be safely kept around for later
// use?
// - Informal, quick benchmarks seem to show most of the overhead is
// from calling TextDecoder.decode() and TextEncoder.encode(), and if
// confirmed, there is nothing which can be done uBO-side to reduce
// overhead.
if ( filterer.buffer === null ) {
filterer.buffer = new Uint8Array(ev.data);
if ( session.buffer === null ) {
session.buffer = new Uint8Array(ev.data);
return;
}
const buffer = new Uint8Array(
filterer.buffer.byteLength +
ev.data.byteLength
session.buffer.byteLength + ev.data.byteLength
);
buffer.set(filterer.buffer);
buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength);
filterer.buffer = buffer;
buffer.set(session.buffer);
buffer.set(new Uint8Array(ev.data), session.buffer.byteLength);
session.buffer = buffer;
};
const onStreamStop = function() {
const filterer = filterers.get(this);
filterers.delete(this);
if ( filterer === undefined || filterer.buffer === null ) {
const session = sessions.get(this);
sessions.delete(this);
if ( session === undefined || session.buffer === null ) {
this.close();
return;
}
if ( this.status !== 'finishedtransferringdata' ) { return; }
if ( domParser === undefined ) {
domParser = new DOMParser();
xmlSerializer = new XMLSerializer();
}
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
let doc;
// If stream encoding is still unknnown, try to extract from document.
let charsetFound = filterer.charset,
charsetUsed = charsetFound;
if ( charsetFound === undefined ) {
if ( utf8TextDecoder === undefined ) {
utf8TextDecoder = new TextDecoder();
}
doc = domParser.parseFromString(
utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)),
filterer.mime
);
charsetFound = charsetFromDoc(doc);
charsetUsed = textEncode.normalizeCharset(charsetFound);
if ( charsetUsed === undefined ) {
return streamClose(filterer);
}
}
doc = domParser.parseFromString(
textDecode(charsetUsed, filterer.buffer),
filterer.mime
);
// https://github.com/gorhill/uBlock/issues/3507
// In case of no explicit charset found, try to find one again, but
// this time with the whole document parsed.
if ( charsetFound === undefined ) {
charsetFound = textEncode.normalizeCharset(charsetFromDoc(doc));
if ( charsetFound !== charsetUsed ) {
if ( charsetFound === undefined ) {
return streamClose(filterer);
}
charsetUsed = charsetFound;
doc = domParser.parseFromString(
textDecode(charsetFound, filterer.buffer),
filterer.mime
);
}
// If encoding is still unknown, try to extract from stream data
if ( session.charset === undefined ) {
const charsetFound = charsetFromStream(session.buffer);
if ( charsetFound === undefined ) { return streamClose(session); }
const charsetUsed = textEncode.normalizeCharset(charsetFound);
if ( charsetUsed === undefined ) { return streamClose(session); }
session.charset = charsetUsed;
}
let modified = false;
if ( filterer.selectors !== undefined ) {
if ( htmlFilteringEngine.apply(doc, filterer) ) {
modified = true;
}
while ( session.jobs.length !== 0 ) {
const job = session.jobs.shift();
modified = job.fn(session, ...job.args) || modified;
}
if ( modified !== true ) { return streamClose(session); }
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
let encodedStream = textEncoder.encode(session.str);
if ( session.charset !== 'utf-8' ) {
encodedStream = textEncode.encode(session.charset, encodedStream);
}
if ( modified === false ) {
return streamClose(filterer);
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
const doctypeStr = doc.doctype instanceof Object ?
xmlSerializer.serializeToString(doc.doctype) + '\n' :
'';
// https://github.com/gorhill/uBlock/issues/3391
let encodedStream = textEncoder.encode(
doctypeStr +
doc.documentElement.outerHTML
);
if ( charsetUsed !== 'utf-8' ) {
encodedStream = textEncode.encode(
charsetUsed,
encodedStream
);
}
streamClose(filterer, encodedStream);
streamClose(session, encodedStream);
};
const onStreamError = function() {
filterers.delete(this);
sessions.delete(this);
};
return function(fctxt, extras) {
// https://github.com/gorhill/uBlock/issues/3478
const statusCode = extras.statusCode || 0;
if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) {
return;
return class Session extends µb.FilteringContext {
constructor(fctxt, details, mime, charset) {
super(fctxt);
this.entity = entityFromDomain(this.getDomain());
this.stream = null;
this.buffer = null;
this.mime = mime;
this.charset = charset;
this.str = null;
this.jobs = [];
}
const hostname = fctxt.getHostname();
if ( hostname === '' ) { return; }
const domain = fctxt.getDomain();
const request = {
stream: undefined,
tabId: fctxt.tabId,
url: fctxt.url,
hostname: hostname,
domain: domain,
entity: entityFromDomain(domain),
selectors: undefined,
buffer: null,
mime: 'text/html',
charset: undefined
};
request.selectors = htmlFilteringEngine.retrieve(request);
if ( request.selectors === undefined ) { return; }
const headers = extras.responseHeaders;
const contentType = headerValueFromName('content-type', headers);
if ( contentType !== '' ) {
request.mime = mimeFromContentType(contentType);
if ( request.mime === undefined ) { return; }
let charset = charsetFromContentType(contentType);
if ( charset !== undefined ) {
charset = textEncode.normalizeCharset(charset);
if ( charset === undefined ) { return; }
request.charset = charset;
getString() {
if ( this.str !== null ) { return this.str; }
if ( textDecoder !== undefined ) {
if ( textDecoder.encoding !== this.charset ) {
textDecoder = undefined;
}
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder(this.charset);
}
this.str = textDecoder.decode(this.buffer);
return this.str;
}
// https://bugzilla.mozilla.org/show_bug.cgi?id=1426789
const disposition = headerValueFromName('content-disposition', headers);
if ( disposition !== '' && disposition.startsWith('inline') === false ) { return; }
setString(s) {
this.str = s;
}
addJob(job) {
this.jobs.push(job);
}
launch() {
if ( this.jobs.length === 0 ) { return; }
this.stream = browser.webRequest.filterResponseData(this.id);
this.stream.ondata = onStreamData;
this.stream.onstop = onStreamStop;
this.stream.onerror = onStreamError;
sessions.set(this.stream, this);
return true;
}
static canFilter(fctxt, details) {
if ( µb.canFilterResponseData !== true ) { return; }
const stream = request.stream =
browser.webRequest.filterResponseData(extras.requestId);
stream.ondata = onStreamData;
stream.onstop = onStreamStop;
stream.onerror = onStreamError;
filterers.set(stream, request);
// https://github.com/gorhill/uBlock/issues/3478
const statusCode = details.statusCode || 0;
if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) {
return;
}
return true;
const hostname = fctxt.getHostname();
if ( hostname === '' ) { return; }
// https://bugzilla.mozilla.org/show_bug.cgi?id=1426789
const headers = details.responseHeaders;
const disposition = headerValueFromName('content-disposition', headers);
if ( disposition !== '' && disposition.startsWith('inline') === false ) {
return;
}
const contentType = headerValueFromName('content-type', headers);
let mime, charset;
if ( contentType !== '' ) {
mime = mimeFromContentType(contentType);
if ( mime === undefined ) { return; }
charset = charsetFromContentType(contentType);
if ( charset !== undefined ) {
charset = textEncode.normalizeCharset(charset);
if ( charset === undefined ) { return; }
}
}
if ( mime.startsWith('text/') === false ) {
if ( otherValidMimes.has(mime) === false ) { return; }
}
return new Session(fctxt, details, mime, charset);
}
};
})();