Fix #3069, and consequently #3374, #3378.

A new filtering class has been created: "static extended filtering".
This new class is an umbrella class for more specialized filtering
engines:
- Cosmetic filtering
- Scriptlet filtering
- HTML filtering

HTML filtering is available only on platforms which support modifying
the response body on the fly, so only Firefox 57+ at the moment.

With the ability to modify the response body, HTML filtering has
been introduced: removing elements from the DOM before the source
data has been parsed by the browser.

A consequence of HTML filtering ability is to bring back script tag
filtering feature.
This commit is contained in:
Raymond Hill 2017-12-28 13:49:02 -05:00
parent d2df01dc08
commit a9f68fe02f
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
18 changed files with 1842 additions and 1049 deletions

View File

@ -29,7 +29,11 @@ vAPI.net = {
onBeforeRequest: {},
onBeforeMaybeSpuriousCSPReport: {},
onHeadersReceived: {},
nativeCSPReportFiltering: true
nativeCSPReportFiltering: true,
webRequest: browser.webRequest,
canFilterResponseBody:
typeof browser.webRequest === 'object' &&
typeof browser.webRequest.filterResponseData === 'function'
};
/******************************************************************************/

View File

@ -22,7 +22,10 @@
<script src="js/dynamic-net-filtering.js"></script>
<script src="js/static-net-filtering.js"></script>
<script src="js/url-net-filtering.js"></script>
<script src="js/static-ext-filtering.js"></script>
<script src="js/cosmetic-filtering.js"></script>
<script src="js/scriptlet-filtering.js"></script>
<script src="js/html-filtering.js"></script>
<script src="js/hnswitches.js"></script>
<script src="js/ublock.js"></script>
<script src="js/messaging.js"></script>

View File

@ -53,11 +53,10 @@ api.removeObserver = function(observer) {
};
var fireNotification = function(topic, details) {
var result;
var result, r;
for ( var i = 0; i < observers.length; i++ ) {
if ( observers[i](topic, details) === false ) {
result = false;
}
r = observers[i](topic, details);
if ( r !== undefined ) { result = r; }
}
return result;
};
@ -955,7 +954,7 @@ var updateNext = function() {
fireNotification(
'before-asset-updated',
{ assetKey: assetKey, type: assetEntry.content }
) !== false
) === true
) {
return assetKey;
}

View File

@ -95,6 +95,7 @@ var µBlock = (function() { // jshint ignore:line
// Features detection.
privacySettingsSupported: vAPI.browserSettings instanceof Object,
cloudStorageSupported: vAPI.cloud instanceof Object,
canFilterResponseBody: vAPI.net.canFilterResponseBody === true,
// https://github.com/chrisaljoudi/uBlock/issues/180
// Whitelist directives need to be loaded once the PSL is available
@ -120,8 +121,8 @@ var µBlock = (function() { // jshint ignore:line
// read-only
systemSettings: {
compiledMagic: 'vrgorlgelgws',
selfieMagic: 'pxpclstriajk'
compiledMagic: 'puuijtkfpspv',
selfieMagic: 'puuijtkfpspv'
},
restoreBackupSettings: {

View File

@ -1379,20 +1379,9 @@ vAPI.domSurveyor = (function() {
// Library of resources is located at:
// https://github.com/gorhill/uBlock/blob/master/assets/ublock/resources.txt
if ( cfeDetails.scripts ) {
// Have the injected script tag remove itself when execution completes:
// to keep DOM as clean as possible.
var text = cfeDetails.scripts +
"\n" +
"(function() {\n" +
" var c = document.currentScript,\n" +
" p = c && c.parentNode;\n" +
" if ( p ) {\n" +
" p.removeChild(c);\n" +
" }\n" +
"})();";
vAPI.injectScriptlet(document, text);
vAPI.injectedScripts = text;
if ( response.scriptlets ) {
vAPI.injectScriptlet(document, response.scriptlets);
vAPI.injectedScripts = response.scriptlets;
}
if ( vAPI.domSurveyor instanceof Object ) {
@ -1414,13 +1403,11 @@ vAPI.domSurveyor = (function() {
};
// This starts bootstrap process.
var url = window.location.href;
vAPI.messaging.send(
'contentscript',
{
what: 'retrieveContentScriptParameters',
pageURL: url,
locationURL: url,
url: window.location.href,
isRootFrame: window === window.top
},
bootstrapPhase1

File diff suppressed because it is too large Load Diff

357
src/js/html-filtering.js Normal file
View File

@ -0,0 +1,357 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
'use strict';
/******************************************************************************/
µBlock.htmlFilteringEngine = (function() {
var api = {};
var µb = µBlock,
filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
pselectors = new Map(),
duplicates = new Set(),
docRegister, loggerRegister;
var PSelectorHasTask = function(task) {
this.selector = task[1];
};
PSelectorHasTask.prototype.exec = function(input) {
var output = [];
for ( var node of input ) {
if ( node.querySelector(this.selector) !== null ) {
output.push(node);
}
}
return output;
};
var PSelectorHasTextTask = function(task) {
this.needle = new RegExp(task[1]);
};
PSelectorHasTextTask.prototype.exec = function(input) {
var output = [];
for ( var node of input ) {
if ( this.needle.test(node.textContent) ) {
output.push(node);
}
}
return output;
};
var PSelectorIfTask = function(task) {
this.pselector = new PSelector(task[1]);
};
PSelectorIfTask.prototype.target = true;
PSelectorIfTask.prototype.exec = function(input) {
var output = [];
for ( var node of input ) {
if ( this.pselector.test(node) === this.target ) {
output.push(node);
}
}
return output;
};
var PSelectorIfNotTask = function(task) {
PSelectorIfTask.call(this, task);
this.target = false;
};
PSelectorIfNotTask.prototype = Object.create(PSelectorIfTask.prototype);
PSelectorIfNotTask.prototype.constructor = PSelectorIfNotTask;
var PSelectorXpathTask = function(task) {
this.xpe = task[1];
};
PSelectorXpathTask.prototype.exec = function(input) {
var output = [],
xpe = docRegister.createExpression(this.xpe, null),
xpr = null;
for ( var node of input ) {
xpr = xpe.evaluate(
node,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
xpr
);
var j = xpr.snapshotLength;
while ( j-- ) {
node = xpr.snapshotItem(j);
if ( node.nodeType === 1 ) {
output.push(node);
}
}
}
return output;
};
var PSelector = function(o) {
if ( PSelector.prototype.operatorToTaskMap === undefined ) {
PSelector.prototype.operatorToTaskMap = new Map([
[ ':has', PSelectorHasTask ],
[ ':has-text', PSelectorHasTextTask ],
[ ':if', PSelectorIfTask ],
[ ':if-not', PSelectorIfNotTask ],
[ ':xpath', PSelectorXpathTask ]
]);
}
this.invalid = false;
this.raw = o.raw;
this.selector = o.selector;
this.tasks = [];
var tasks = o.tasks;
if ( !tasks ) { return; }
for ( var task of tasks ) {
var ctor = this.operatorToTaskMap.get(task[0]);
if ( ctor === undefined ) {
this.invalid = true;
break;
}
this.tasks.push(new ctor(task));
}
};
PSelector.prototype.operatorToTaskMap = undefined;
PSelector.prototype.prime = function(input) {
var root = input || docRegister;
if ( this.selector !== '' ) {
return root.querySelectorAll(this.selector);
}
return [ root ];
};
PSelector.prototype.exec = function(input) {
if ( this.invalid ) { return; }
var nodes = this.prime(input);
for ( var task of this.tasks ) {
if ( nodes.length === 0 ) { break; }
nodes = task.exec(nodes);
}
return nodes;
};
var logOne = function(details, selector) {
loggerRegister.writeOne(
details.tabId,
'cosmetic',
{ source: 'cosmetic', raw: '##^' + selector },
'dom',
details.url,
null,
details.hostname
);
};
var applyProceduralSelector = function(details, selector) {
var pselector = pselectors.get(selector);
if ( pselector === undefined ) {
pselector = new PSelector(JSON.parse(selector));
pselectors.set(selector, pselector);
}
var nodes = pselector.exec(),
i = nodes.length,
modified = false;
while ( i-- ) {
var node = nodes[i];
if ( node.parentNode !== null ) {
node.parentNode.removeChild(node);
modified = true;
}
}
if ( modified && loggerRegister.isEnabled() ) {
logOne(details, pselector.raw);
}
return modified;
};
var applyCSSSelector = function(details, selector) {
var nodes = docRegister.querySelectorAll(selector),
i = nodes.length,
modified = false;
while ( i-- ) {
var node = nodes[i];
if ( node.parentNode !== null ) {
node.parentNode.removeChild(node);
modified = true;
}
}
if ( modified && loggerRegister.isEnabled() ) {
logOne(details, selector);
}
return modified;
};
api.reset = function() {
filterDB.clear();
pselectors.clear();
duplicates.clear();
};
api.freeze = function() {
duplicates.clear();
};
api.compile = function(parsed, writer) {
var selector = parsed.suffix.slice(1).trim(),
compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; }
// 1002 = html filtering
writer.select(1002);
// TODO: Mind negated hostnames, they are currently discarded.
for ( var hostname of parsed.hostnames ) {
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
var domain = µb.URI.domainFromHostname(hostname);
writer.push([
compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65,
parsed.exception ? '!' + domain : domain,
hostname,
compiled
]);
}
};
api.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported.
//if ( µb.canFilterResponseBody === false ) { return; }
// 1002 = html filtering
reader.select(1002);
while ( reader.next() ) {
var fingerprint = reader.fingerprint();
if ( duplicates.has(fingerprint) ) { continue; }
duplicates.add(fingerprint);
var args = reader.args();
filterDB.add(args[1], {
type: args[0],
hostname: args[2],
selector: args[3]
});
}
};
api.retrieve = function(request) {
var hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835
// Do not filter if the site is under an `allow` rule.
if (
µb.userSettings.advancedUserEnabled &&
µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2
) {
return;
}
var out = [];
if ( request.domain !== '' ) {
filterDB.retrieve(request.domain, hostname, out);
filterDB.retrieve(request.entity, request.entity, out);
}
filterDB.retrieve('', hostname, out);
// TODO: handle exceptions.
if ( out.length !== 0 ) {
return out;
}
};
api.apply = function(doc, details) {
docRegister = doc;
loggerRegister = µb.logger;
var modified = false;
for ( var entry of details.selectors ) {
if ( entry.type === 64 ) {
if ( applyCSSSelector(details, entry.selector) ) {
modified = true;
}
} else {
if ( applyProceduralSelector(details, entry.selector) ) {
modified = true;
}
}
}
docRegister = loggerRegister = undefined;
return modified;
};
api.toSelfie = function() {
return filterDB.toSelfie();
};
api.fromSelfie = function(selfie) {
filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
pselectors.clear();
};
// TODO: Following methods is useful only to legacy Firefox. This can be
// removed once support for legacy Firefox is dropped. The only care
// at this point is for the code to work, not to be efficient.
// Only `script:has-text` selectors are considered.
api.retrieveScriptTagHostnames = function() {
var out = new Set();
for ( var entry of filterDB ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(entry.hostname);
}
}
if ( out.size !== 0 ) {
return Array.from(out);
}
};
api.retrieveScriptTagRegex = function(domain, hostname) {
var entries = api.retrieve({
hostname: hostname,
domain: domain,
entity: µb.URI.entityFromDomain(domain)
});
if ( entries === undefined ) { return; }
var out = new Set();
for ( var entry of entries ) {
if ( entry.type !== 65 ) { continue; }
var o = JSON.parse(entry.selector);
if (
o.tasks.length === 1 &&
o.tasks[0].length === 2 &&
o.tasks[0][0] === ':has-text'
) {
out.add(o.tasks[0][1]);
}
}
if ( out.size !== 0 ) {
return Array.from(out).join('|');
}
};
return api;
})();
/******************************************************************************/

View File

@ -102,7 +102,7 @@ var onMessage = function(request, sender, callback) {
break;
case 'compileCosmeticFilterSelector':
response = µb.cosmeticFilteringEngine.compileSelector(request.selector);
response = µb.staticExtFilteringEngine.compileSelector(request.selector);
break;
case 'cosmeticFiltersInjected':
@ -465,7 +465,7 @@ var onMessage = function(request, sender, callback) {
var µb = µBlock,
response,
tabId, frameId,
pageStore;
pageStore = null;
if ( sender && sender.tab ) {
tabId = sender.tab.id;
@ -491,21 +491,33 @@ var onMessage = function(request, sender, callback) {
break;
case 'retrieveContentScriptParameters':
if ( pageStore && pageStore.getNetFilteringSwitch() ) {
response = {
collapseBlocked: µb.userSettings.collapseBlocked,
noCosmeticFiltering: pageStore.noCosmeticFiltering === true,
noGenericCosmeticFiltering:
pageStore.noGenericCosmeticFiltering === true
};
request.tabId = tabId;
request.frameId = frameId;
response.specificCosmeticFilters =
µb.cosmeticFilteringEngine
.retrieveDomainSelectors(request, response);
if ( request.isRootFrame && µb.logger.isEnabled() ) {
µb.logCosmeticFilters(tabId);
}
if (
pageStore === null ||
pageStore.getNetFilteringSwitch() === false ||
!request.url
) {
break;
}
response = {
collapseBlocked: µb.userSettings.collapseBlocked,
noCosmeticFiltering: pageStore.noCosmeticFiltering === true,
noGenericCosmeticFiltering:
pageStore.noGenericCosmeticFiltering === true
};
request.tabId = tabId;
request.frameId = frameId;
request.hostname = µb.URI.hostnameFromURI(request.url);
request.domain = µb.URI.domainFromHostname(request.hostname);
request.entity = µb.URI.entityFromDomain(request.domain);
response.specificCosmeticFilters =
µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response);
// If response body filtering is supported, than the scriptlets have
// already been injected.
if ( µb.canFilterResponseBody === false ) {
response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
}
if ( request.isRootFrame && µb.logger.isEnabled() ) {
µb.logCosmeticFilters(tabId);
}
break;

View File

@ -26,7 +26,26 @@
/******************************************************************************/
var listEntries = Object.create(null),
filterClassSeparator = '\n/* end of network - start of cosmetic */\n';
reBlockStart = /^#block-start-(\d+)\n/gm;
/******************************************************************************/
var extractBlocks = function(content, begId, endId) {
reBlockStart.lastIndex = 0;
var out = [];
var match = reBlockStart.exec(content);
while ( match !== null ) {
var beg = match.index + match[0].length;
var blockId = parseInt(match[1], 10);
if ( blockId >= begId && blockId < endId ) {
var end = content.indexOf('#block-end-' + match[1], beg);
out.push(content.slice(beg, end));
reBlockStart.lastIndex = end;
}
match = reBlockStart.exec(content);
}
return out.join('\n');
};
/******************************************************************************/
@ -34,13 +53,11 @@ var fromNetFilter = function(details) {
var lists = [],
compiledFilter = details.compiledFilter,
entry, content, pos, notFound;
for ( var assetKey in listEntries ) {
entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
content = entry.content.slice(
0,
entry.content.indexOf(filterClassSeparator)
);
content = extractBlocks(entry.content, 0, 1000);
pos = 0;
for (;;) {
pos = content.indexOf(compiledFilter, pos);
@ -96,7 +113,7 @@ var fromNetFilter = function(details) {
// the various compiled versions.
var fromCosmeticFilter = function(details) {
var match = /^#@?#/.exec(details.rawFilter),
var match = /^#@?#\^?/.exec(details.rawFilter),
prefix = match[0],
selector = details.rawFilter.slice(prefix.length);
@ -138,15 +155,14 @@ var fromCosmeticFilter = function(details) {
}
var response = Object.create(null),
assetKey, entry, content, found, beg, end, fargs;
assetKey, entry, content,
found, beg, end,
fargs, isProcedural;
for ( assetKey in listEntries ) {
entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
content = entry.content.slice(
entry.content.indexOf(filterClassSeparator) +
filterClassSeparator.length
);
content = extractBlocks(entry.content, 1000, 2000);
found = undefined;
while ( (match = reNeedle.exec(content)) !== null ) {
beg = content.lastIndexOf('\n', match.index);
@ -194,12 +210,15 @@ var fromCosmeticFilter = function(details) {
found = prefix + selector;
}
break;
case 6:
case 8:
case 9:
case 32:
case 64:
case 65:
isProcedural = fargs[3].charCodeAt(0) === 0x7B;
if (
fargs[0] !== 9 && fargs[3] !== selector ||
fargs[0] === 9 && JSON.parse(fargs[3]).raw !== selector
isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector
) {
break;
}

View File

@ -35,22 +35,21 @@ if ( typeof vAPI.rpcReceiver !== 'object' ) {
vAPI.rpcReceiver.getScriptTagHostnames = function() {
var µb = µBlock;
var cfe = µb.cosmeticFilteringEngine;
if ( !cfe ) { return; }
return cfe.retrieveScriptTagHostnames();
if ( µb.htmlFilteringEngine ) {
return µb.htmlFilteringEngine.retrieveScriptTagHostnames();
}
};
/******************************************************************************/
vAPI.rpcReceiver.getScriptTagFilters = function(details) {
var µb = µBlock;
var cfe = µb.cosmeticFilteringEngine;
if ( !cfe ) { return; }
if ( !µb.htmlFilteringEngine ) { return; }
// Fetching the script tag filters first: assuming it is faster than
// checking whether the site is whitelisted.
var hostname = details.frameHostname;
var r = cfe.retrieveScriptTagRegex(
µb.URI.domainFromHostname(hostname) || hostname,
var r = µb.htmlFilteringEngine.retrieveScriptTagRegex(
µb.URI.domainFromHostname(hostname),
hostname
);
// https://github.com/gorhill/uBlock/issues/838

View File

@ -0,0 +1,270 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
'use strict';
/******************************************************************************/
µBlock.scriptletFilteringEngine = (function() {
var api = {};
var µb = µBlock,
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
duplicates = new Set(),
scriptletCache = new µb.MRUCache(32),
exceptionsRegister = new Set(),
scriptletsRegister = new Map(),
reEscapeScriptArg = /[\\'"]/g;
var scriptletRemover = [
'(function() {',
' var c = document.currentScript, p = c && c.parentNode;',
' if ( p ) { p.removeChild(c); }',
'})();'
].join('\n');
var lookupScriptlet = function(raw, reng, toInject) {
if ( toInject.has(raw) ) { return; }
if ( scriptletCache.resetTime < reng.modifyTime ) {
scriptletCache.reset();
}
var content = scriptletCache.lookup(raw);
if ( content === undefined ) {
var token, args,
pos = raw.indexOf(',');
if ( pos === -1 ) {
token = raw;
} else {
token = raw.slice(0, pos).trim();
args = raw.slice(pos + 1).trim();
}
content = reng.resourceContentFromName(token, 'application/javascript');
if ( !content ) { return; }
if ( args ) {
content = patchScriptlet(content, args);
if ( !content ) { return; }
}
scriptletCache.add(raw, content);
}
toInject.set(raw, content);
};
// Fill template placeholders. Return falsy if:
// - At least one argument contains anything else than /\w/ and `.`
var patchScriptlet = function(content, args) {
var i = 1,
pos, arg;
while ( args !== '' ) {
pos = args.indexOf(',');
if ( pos === -1 ) { pos = args.length; }
arg = args.slice(0, pos).trim().replace(reEscapeScriptArg, '\\$&');
content = content.replace('{{' + i + '}}', arg);
args = args.slice(pos + 1).trim();
i++;
}
return content;
};
var logOne = function(isException, token, details) {
µb.logger.writeOne(
details.tabId,
'cosmetic',
{
source: 'cosmetic',
raw: (isException ? '#@#' : '##') + 'script:inject(' + token + ')'
},
'dom',
details.url,
null,
details.hostname
);
};
api.reset = function() {
scriptletDB.clear();
duplicates.clear();
};
api.freeze = function() {
duplicates.clear();
};
api.compile = function(parsed, writer) {
// 1001 = scriptlet injection
writer.select(1001);
// Only exception filters are allowed to be global.
if ( parsed.hostnames.length === 0 ) {
if ( parsed.exception ) {
writer.push([ 32, '!', '', parsed.suffix ]);
}
return;
}
// https://github.com/gorhill/uBlock/issues/3375
// Ignore instances of exception filter with negated hostnames,
// because there is no way to create an exception to an exception.
var µburi = µb.URI;
for ( var hostname of parsed.hostnames ) {
var negated = hostname.charCodeAt(0) === 0x7E /* '~' */;
if ( negated ) {
hostname = hostname.slice(1);
}
var hash = µburi.domainFromHostname(hostname);
if ( parsed.exception ) {
if ( negated ) { continue; }
hash = '!' + hash;
} else if ( negated ) {
hash = '!' + hash;
}
writer.push([ 32, hash, hostname, parsed.suffix ]);
}
};
// 01234567890123456789
// script:inject(token[, arg[, ...]])
// ^ ^
// 14 -1
api.fromCompiledContent = function(reader) {
// 1001 = scriptlet injection
reader.select(1001);
while ( reader.next() ) {
var fingerprint = reader.fingerprint();
if ( duplicates.has(fingerprint) ) { continue; }
duplicates.add(fingerprint);
var args = reader.args();
if ( args.length < 4 ) { continue; }
scriptletDB.add(
args[1],
{ hostname: args[2], token: args[3].slice(14, -1) }
);
}
};
api.retrieve = function(request) {
if ( scriptletDB.size === 0 ) { return; }
if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; }
var reng = µb.redirectEngine;
if ( !reng ) { return; }
var hostname = request.hostname;
// https://github.com/gorhill/uBlock/issues/2835
// Do not inject scriptlets if the site is under an `allow` rule.
if (
µb.userSettings.advancedUserEnabled &&
µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2
) {
return;
}
var domain = request.domain,
entity = request.entity,
entries, entry;
// https://github.com/gorhill/uBlock/issues/1954
// Implicit
var hn = hostname;
for (;;) {
lookupScriptlet(hn + '.js', reng, scriptletsRegister);
if ( hn === domain ) { break; }
var pos = hn.indexOf('.');
if ( pos === -1 ) { break; }
hn = hn.slice(pos + 1);
}
if ( entity !== '' ) {
lookupScriptlet(entity + '.js', reng, scriptletsRegister);
}
// Explicit
entries = [];
if ( domain !== '' ) {
scriptletDB.retrieve(domain, hostname, entries);
scriptletDB.retrieve(entity, entity, entries);
}
scriptletDB.retrieve('', hostname, entries);
for ( entry of entries ) {
lookupScriptlet(entry.token, reng, scriptletsRegister);
}
if ( scriptletsRegister.size === 0 ) { return; }
// Collect exception filters.
entries = [];
if ( domain !== '' ) {
scriptletDB.retrieve('!' + domain, hostname, entries);
scriptletDB.retrieve('!' + entity, entity, entries);
}
scriptletDB.retrieve('!', hostname, entries);
for ( entry of entries ) {
exceptionsRegister.add(entry.token);
}
// Return an array of scriptlets, and log results if needed.
var out = [],
logger = µb.logger.isEnabled() ? µb.logger : null,
isException;
for ( entry of scriptletsRegister ) {
if ( (isException = exceptionsRegister.has(entry[0])) === false ) {
out.push(entry[1]);
}
if ( logger !== null ) {
logOne(isException, entry[0], request);
}
}
scriptletsRegister.clear();
exceptionsRegister.clear();
if ( out.length === 0 ) { return; }
out.push(scriptletRemover);
return out.join('\n');
};
api.apply = function(doc, details) {
var script = doc.createElement('script');
script.textContent = details.scriptlets;
doc.head.insertBefore(script, doc.head.firstChild);
return true;
};
api.toSelfie = function() {
return scriptletDB.toSelfie();
};
api.fromSelfie = function(selfie) {
scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
};
return api;
})();
/******************************************************************************/

View File

@ -39,7 +39,7 @@ vAPI.app.onShutdown = function() {
µb.staticFilteringReverseLookup.shutdown();
µb.assets.updateStop();
µb.staticNetFilteringEngine.reset();
µb.cosmeticFilteringEngine.reset();
µb.staticExtFilteringEngine.reset();
µb.sessionFirewall.reset();
µb.permanentFirewall.reset();
µb.permanentFirewall.reset();
@ -139,7 +139,7 @@ var onSelfieReady = function(selfie) {
µb.availableFilterLists = selfie.availableFilterLists;
µb.staticNetFilteringEngine.fromSelfie(selfie.staticNetFilteringEngine);
µb.redirectEngine.fromSelfie(selfie.redirectEngine);
µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmeticFilteringEngine);
µb.staticExtFilteringEngine.fromSelfie(selfie.staticExtFilteringEngine);
return true;
};

View File

@ -0,0 +1,680 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* global punycode */
'use strict';
/*******************************************************************************
All static extended filters are of the form:
field 1: one hostname, or a list of comma-separated hostnames
field 2: `##` or `#@#`
field 3: selector
The purpose of the static extended filtering engine is to coarse-parse and
dispatch to appropriate specialized filtering engines. There are currently
three specialized filtering engines:
- cosmetic filtering (aka "element hiding" in Adblock Plus)
- scriptlet injection: selector starts with `script:inject`
- html filtering: selector starts with `^`
Depending on the specialized filtering engine, field 1 may or may not be
optional.
The static extended filtering engine also offers parsing capabilities which
are available to all other specialized fitlering engines. For example,
cosmetic and html filtering can ask the extended filtering engine to
compile/validate selectors.
**/
µBlock.staticExtFilteringEngine = (function() {
var µb = µBlock,
reHostnameSeparator = /\s*,\s*/,
reHasUnicode = /[^\x00-\x7F]/,
reIsRegexLiteral = /^\/.+\/$/,
emptyArray = [],
parsed = {
hostnames: [],
exception: false,
suffix: ''
};
var isValidCSSSelector = (function() {
var div = document.createElement('div'),
matchesFn;
// Keep in mind:
// https://github.com/gorhill/uBlock/issues/693
// https://github.com/gorhill/uBlock/issues/1955
if ( div.matches instanceof Function ) {
matchesFn = div.matches.bind(div);
} else if ( div.mozMatchesSelector instanceof Function ) {
matchesFn = div.mozMatchesSelector.bind(div);
} else if ( div.webkitMatchesSelector instanceof Function ) {
matchesFn = div.webkitMatchesSelector.bind(div);
} else if ( div.msMatchesSelector instanceof Function ) {
matchesFn = div.msMatchesSelector.bind(div);
} else {
matchesFn = div.querySelector.bind(div);
}
// https://github.com/gorhill/uBlock/issues/3111
// Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
// is fixed.
try {
matchesFn(':scope');
} catch (ex) {
matchesFn = div.querySelector.bind(div);
}
return function(s) {
try {
matchesFn(s + ', ' + s + ':not(#foo)');
} catch (ex) {
return false;
}
return true;
};
})();
var isBadRegex = function(s) {
try {
void new RegExp(s);
} catch (ex) {
isBadRegex.message = ex.toString();
return true;
}
return false;
};
var translateAdguardCSSInjectionFilter = function(suffix) {
var matches = /^([^{]+)\{([^}]+)\}$/.exec(suffix);
if ( matches === null ) { return ''; }
return matches[1].trim() + ':style(' + matches[2].trim() + ')';
};
var toASCIIHostname = function(hostname) {
if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
return '~' + punycode.toASCII(hostname.slice(1));
}
return punycode.toASCII(hostname);
};
var compileProceduralSelector = (function() {
var reOperatorParser = new RegExp([
'(:(?:',
[
'-abp-contains',
'-abp-has',
'contains',
'has',
'has-text',
'if',
'if-not',
'matches-css',
'matches-css-after',
'matches-css-before',
'xpath'
].join('|'),
'))\\(.+\\)$'
].join(''));
var reFirstParentheses = /^\(*/,
reLastParentheses = /\)*$/,
reEscapeRegex = /[.*+?^${}()|[\]\\]/g,
reNeedScope = /^\s*[+>~]/;
var lastProceduralSelector = '',
lastProceduralSelectorCompiled,
regexToRawValue = new Map();
var compileCSSSelector = function(s) {
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
// Prepend `:scope ` if needed.
if ( reNeedScope.test(s) ) {
s = ':scope ' + s;
}
if ( isValidCSSSelector(s) ) {
return s;
}
};
var compileText = function(s) {
var reText;
if ( reIsRegexLiteral.test(s) ) {
reText = s.slice(1, -1);
if ( isBadRegex(reText) ) { return; }
} else {
reText = s.replace(reEscapeRegex, '\\$&');
regexToRawValue.set(reText, s);
}
return reText;
};
var compileCSSDeclaration = function(s) {
var name, value, reText,
pos = s.indexOf(':');
if ( pos === -1 ) { return; }
name = s.slice(0, pos).trim();
value = s.slice(pos + 1).trim();
if ( reIsRegexLiteral.test(value) ) {
reText = value.slice(1, -1);
if ( isBadRegex(reText) ) { return; }
} else {
reText = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
regexToRawValue.set(reText, value);
}
return { name: name, value: reText };
};
var compileConditionalSelector = function(s) {
// https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
// Prepend `:scope ` if needed.
if ( reNeedScope.test(s) ) {
s = ':scope ' + s;
}
return compile(s);
};
var compileXpathExpression = function(s) {
try {
document.createExpression(s, null);
} catch (e) {
return;
}
return s;
};
// https://github.com/gorhill/uBlock/issues/2793
var normalizedOperators = new Map([
[ ':-abp-contains', ':has-text' ],
[ ':-abp-has', ':if' ],
[ ':contains', ':has-text' ]
]);
var compileArgument = new Map([
[ ':has', compileCSSSelector ],
[ ':has-text', compileText ],
[ ':if', compileConditionalSelector ],
[ ':if-not', compileConditionalSelector ],
[ ':matches-css', compileCSSDeclaration ],
[ ':matches-css-after', compileCSSDeclaration ],
[ ':matches-css-before', compileCSSDeclaration ],
[ ':xpath', compileXpathExpression ]
]);
// https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
// Normalize (somewhat) the stringified version of procedural
// cosmetic filters -- this increase the likelihood of detecting
// duplicates given that uBO is able to understand syntax specific
// to other blockers.
// The normalized string version is what is reported in the logger,
// by design.
var decompile = function(compiled) {
var raw = [ compiled.selector ],
tasks = compiled.tasks,
value;
if ( Array.isArray(tasks) ) {
for ( var i = 0, n = tasks.length, task; i < n; i++ ) {
task = tasks[i];
switch ( task[0] ) {
case ':has':
case ':xpath':
raw.push(task[0], '(', task[1], ')');
break;
case ':has-text':
value = regexToRawValue.get(task[1]);
if ( value === undefined ) {
value = '/' + task[1] + '/';
}
raw.push(task[0], '(', value, ')');
break;
case ':matches-css':
case ':matches-css-after':
case ':matches-css-before':
value = regexToRawValue.get(task[1].value);
if ( value === undefined ) {
value = '/' + task[1].value + '/';
}
raw.push(task[0], '(', task[1].name, ': ', value, ')');
break;
case ':if':
case ':if-not':
raw.push(task[0], '(', decompile(task[1]), ')');
break;
}
}
}
return raw.join('');
};
var compile = function(raw) {
var matches = reOperatorParser.exec(raw);
if ( matches === null ) {
if ( isValidCSSSelector(raw) ) { return { selector: raw }; }
return;
}
var tasks = [],
firstOperand = raw.slice(0, matches.index),
currentOperator = matches[1],
selector = raw.slice(matches.index + currentOperator.length),
currentArgument = '', nextOperand, nextOperator,
depth = 0, opening, closing;
if (
firstOperand !== '' &&
isValidCSSSelector(firstOperand) === false
) {
return;
}
for (;;) {
matches = reOperatorParser.exec(selector);
if ( matches !== null ) {
nextOperand = selector.slice(0, matches.index);
nextOperator = matches[1];
} else {
nextOperand = selector;
nextOperator = '';
}
opening = reFirstParentheses.exec(nextOperand)[0].length;
closing = reLastParentheses.exec(nextOperand)[0].length;
if ( opening > closing ) {
if ( depth === 0 ) { currentArgument = ''; }
depth += 1;
} else if ( closing > opening && depth > 0 ) {
depth -= 1;
if ( depth === 0 ) {
nextOperand = currentArgument + nextOperand;
}
}
if ( depth !== 0 ) {
currentArgument += nextOperand + nextOperator;
} else {
currentOperator =
normalizedOperators.get(currentOperator) ||
currentOperator;
currentArgument =
compileArgument.get(currentOperator)(
nextOperand.slice(1, -1)
);
if ( currentArgument === undefined ) { return; }
tasks.push([ currentOperator, currentArgument ]);
currentOperator = nextOperator;
}
if ( nextOperator === '' ) { break; }
selector = selector.slice(matches.index + nextOperator.length);
}
if ( tasks.length === 0 || depth !== 0 ) { return; }
return { selector: firstOperand, tasks: tasks };
};
var entryPoint = function(raw) {
if ( raw === lastProceduralSelector ) {
return lastProceduralSelectorCompiled;
}
lastProceduralSelector = raw;
var compiled = compile(raw);
if ( compiled !== undefined ) {
compiled.raw = decompile(compiled);
compiled = JSON.stringify(compiled);
}
lastProceduralSelectorCompiled = compiled;
return compiled;
};
entryPoint.reset = function() {
regexToRawValue = new Map();
lastProceduralSelector = '';
lastProceduralSelectorCompiled = undefined;
};
return entryPoint;
})();
//--------------------------------------------------------------------------
// Public API
//--------------------------------------------------------------------------
var api = {};
//--------------------------------------------------------------------------
// Public classes
//--------------------------------------------------------------------------
api.HostnameBasedDB = function(selfie) {
if ( selfie !== undefined ) {
this.db = new Map(selfie.map);
this.size = selfie.size;
} else {
this.db = new Map();
this.size = 0;
}
};
api.HostnameBasedDB.prototype = {
add: function(hash, entry) {
var bucket = this.db.get(hash);
if ( bucket === undefined ) {
this.db.set(hash, entry);
} else if ( Array.isArray(bucket) ) {
bucket.push(entry);
} else {
this.db.set(hash, [ bucket, entry ]);
}
this.size += 1;
},
clear: function() {
this.db.clear();
this.size = 0;
},
retrieve: function(hash, hostname, out) {
var bucket = this.db.get(hash);
if ( bucket === undefined ) { return; }
if ( Array.isArray(bucket) === false ) {
if ( hostname.endsWith(bucket.hostname) ) { out.push(bucket); }
return;
}
var i = bucket.length;
while ( i-- ) {
var entry = bucket[i];
if ( hostname.endsWith(entry.hostname) ) { out.push(entry); }
}
},
toSelfie: function() {
return {
map: Array.from(this.db),
size: this.size
};
}
};
api.HostnameBasedDB.prototype[Symbol.iterator] = (function() {
var Iter = function(db) {
this.mapIter = db.values();
this.arrayIter = undefined;
};
Iter.prototype.next = function() {
var result;
if ( this.arrayIter !== undefined ) {
result = this.arrayIter.next();
if ( result.done === false ) { return result; }
this.arrayIter = undefined;
}
result = this.mapIter.next();
if ( result.done || Array.isArray(result.value) === false ) {
return result;
}
this.arrayIter = result.value[Symbol.iterator]();
return this.arrayIter.next(); // array should never be empty
};
return function() {
return new Iter(this.db);
};
})();
//--------------------------------------------------------------------------
// Public methods
//--------------------------------------------------------------------------
api.reset = function() {
compileProceduralSelector.reset();
µb.cosmeticFilteringEngine.reset();
µb.scriptletFilteringEngine.reset();
µb.htmlFilteringEngine.reset();
};
api.freeze = function() {
compileProceduralSelector.reset();
µb.cosmeticFilteringEngine.freeze();
µb.scriptletFilteringEngine.freeze();
µb.htmlFilteringEngine.freeze();
};
// https://github.com/chrisaljoudi/uBlock/issues/1004
// Detect and report invalid CSS selectors.
// Discard new ABP's `-abp-properties` directive until it is
// implemented (if ever). Unlikely, see:
// https://github.com/gorhill/uBlock/issues/1752
// https://github.com/gorhill/uBlock/issues/2624
// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
api.compileSelector = (function() {
var reAfterBeforeSelector = /^(.+?)(::?after|::?before)$/,
reStyleSelector = /^(.+?):style\((.+?)\)$/,
reStyleBad = /url\([^)]+\)/,
reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/,
reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/,
div = document.createElement('div');
var normalizedExtendedSyntaxOperators = new Map([
[ 'contains', ':has-text' ],
[ 'has', ':if' ],
[ 'matches-css', ':matches-css' ],
[ 'matches-css-after', ':matches-css-after' ],
[ 'matches-css-before', ':matches-css-before' ],
]);
var isValidStyleProperty = function(cssText) {
if ( reStyleBad.test(cssText) ) { return false; }
div.style.cssText = cssText;
if ( div.style.cssText === '' ) { return false; }
div.style.cssText = '';
return true;
};
var entryPoint = function(raw) {
var extendedSyntax = reExtendedSyntax.test(raw);
if ( isValidCSSSelector(raw) && extendedSyntax === false ) {
return raw;
}
// We rarely reach this point -- majority of selectors are plain
// CSS selectors.
var matches, operator;
// Supported Adguard/ABP advanced selector syntax: will translate into
// uBO's syntax before further processing.
// Mind unsupported advanced selector syntax, such as ABP's
// `-abp-properties`.
// Note: extended selector syntax has been deprecated in ABP, in favor
// of the procedural one (i.e. `:operator(...)`). See
// https://issues.adblockplus.org/ticket/5287
if ( extendedSyntax ) {
while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
operator = normalizedExtendedSyntaxOperators.get(matches[1]);
if ( operator === undefined ) { return; }
raw = raw.slice(0, matches.index) +
operator + '(' + matches[3] + ')' +
raw.slice(matches.index + matches[0].length);
}
return entryPoint(raw);
}
var selector = raw,
pseudoclass, style;
// `:style` selector?
if ( (matches = reStyleSelector.exec(selector)) !== null ) {
selector = matches[1];
style = matches[2];
}
// https://github.com/gorhill/uBlock/issues/2448
// :after- or :before-based selector?
if ( (matches = reAfterBeforeSelector.exec(selector)) ) {
selector = matches[1];
pseudoclass = matches[2];
}
if ( style !== undefined || pseudoclass !== undefined ) {
if ( isValidCSSSelector(selector) === false ) {
return;
}
if ( pseudoclass !== undefined ) {
selector += pseudoclass;
}
if ( style !== undefined ) {
if ( isValidStyleProperty(style) === false ) { return; }
return JSON.stringify({
raw: raw,
style: [ selector, style ]
});
}
return JSON.stringify({
raw: raw,
pseudoclass: true
});
}
// Procedural selector?
var compiled;
if ( (compiled = compileProceduralSelector(raw)) ) {
return compiled;
}
µb.logger.writeOne(
'',
'error',
'Cosmetic filtering invalid filter: ' + raw
);
};
return entryPoint;
})();
api.compile = function(raw, writer) {
var lpos = raw.indexOf('#');
if ( lpos === -1 ) { return false; }
var rpos = lpos + 1;
if ( raw.charCodeAt(rpos) !== 0x23 /* '#' */ ) {
rpos = raw.indexOf('#', rpos + 1);
if ( rpos === -1 ) { return false; }
}
// Coarse-check that the anchor is valid.
// `##`: l = 1
// `#@#`, `#$#`, `#%#`, `#?#`: l = 2
// `#@$#`, `#@%#`, `#@?#`: l = 3
if ( (rpos - lpos) > 3 ) { return false; }
// Extract the selector.
var suffix = parsed.suffix = raw.slice(rpos + 1).trim();
if ( suffix.length === 0 ) { return false; }
// https://github.com/gorhill/uBlock/issues/952
// Find out whether we are dealing with an Adguard-specific cosmetic
// filter, and if so, translate it if supported, or discard it if not
// supported.
// We have an Adguard/ABP cosmetic filter if and only if the
// character is `$`, `%` or `?`, otherwise it's not a cosmetic
// filter.
var cCode = raw.charCodeAt(rpos - 1);
if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) {
// Adguard's scriptlet injection: not supported.
if ( cCode === 0x25 /* '%' */ ) { return true; }
// Not a known extended filter.
if ( cCode !== 0x24 /* '$' */ && cCode !== 0x3F /* '?' */ ) {
return false;
}
// Adguard's style injection: translate to uBO's format.
if ( cCode === 0x24 /* '$' */ ) {
suffix = translateAdguardCSSInjectionFilter(suffix);
if ( suffix === '' ) { return true; }
}
}
// Exception filter?
parsed.exception = raw.charCodeAt(lpos + 1) === 0x40 /* '@' */;
// Extract the hostname(s), punycode if required.
if ( lpos === 0 ) {
parsed.hostnames = emptyArray;
} else {
var prefix = raw.slice(0, lpos);
parsed.hostnames = prefix.split(reHostnameSeparator);
if ( reHasUnicode.test(prefix) ) {
for ( var hostname of parsed.hostnames ) {
parsed.hostnames = toASCIIHostname(hostname);
}
}
}
if ( suffix.startsWith('script:') ) {
// Scriptlet injection engine.
if ( suffix.startsWith('script:inject') ) {
µb.scriptletFilteringEngine.compile(parsed, writer);
return true;
}
// Script tag filtering: courtesy-conversion to HTML filtering.
if ( parsed.suffix.startsWith('script:contains') ) {
console.info(
'uBO: ##script:contains(...) is deprecated, ' +
'converting to ##^script:has-text(...)'
);
suffix = parsed.suffix = suffix.replace(
/^script:contains/,
'^script:has-text'
);
}
}
// HTML filtering engine.
// TODO: evaluate converting Adguard's `$$` syntax into uBO's HTML
// filtering syntax.
if ( suffix.charCodeAt(0) === 0x5E /* '^' */ ) {
µb.htmlFilteringEngine.compile(parsed, writer);
return true;
}
// Cosmetic filtering engine.
µb.cosmeticFilteringEngine.compile(parsed, writer);
return true;
};
api.fromCompiledContent = function(reader, options) {
µb.cosmeticFilteringEngine.fromCompiledContent(reader, options);
µb.scriptletFilteringEngine.fromCompiledContent(reader, options);
µb.htmlFilteringEngine.fromCompiledContent(reader, options);
};
api.toSelfie = function() {
return {
cosmetic: µb.cosmeticFilteringEngine.toSelfie(),
scriptlets: µb.scriptletFilteringEngine.toSelfie(),
html: µb.htmlFilteringEngine.toSelfie()
};
};
api.fromSelfie = function(selfie) {
µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmetic);
µb.scriptletFilteringEngine.fromSelfie(selfie.scriptlets);
µb.htmlFilteringEngine.fromSelfie(selfie.html);
};
return api;
})();
/******************************************************************************/

View File

@ -2116,6 +2116,9 @@ FilterContainer.prototype.compile = function(raw, writer) {
return false;
}
// 0 = network filters
writer.select(0);
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
@ -2268,6 +2271,9 @@ FilterContainer.prototype.fromCompiledContent = function(reader) {
args, bits, bucket, entry,
tokenHash, fdata, fingerprint;
// 0 = network filters
reader.select(0);
while ( reader.next() === true ) {
args = reader.args();
bits = args[0];

View File

@ -347,7 +347,7 @@
vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists });
µb.staticNetFilteringEngine.freeze();
µb.redirectEngine.freeze();
µb.cosmeticFilteringEngine.freeze();
µb.staticExtFilteringEngine.freeze();
µb.selfieManager.destroy();
};
@ -543,7 +543,7 @@
var onDone = function() {
µb.staticNetFilteringEngine.freeze();
µb.cosmeticFilteringEngine.freeze();
µb.staticExtFilteringEngine.freeze();
µb.redirectEngine.freeze();
vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists });
@ -586,7 +586,7 @@
µb.availableFilterLists = lists;
µb.redirectEngine.reset();
µb.cosmeticFilteringEngine.reset();
µb.staticExtFilteringEngine.reset();
µb.staticNetFilteringEngine.reset();
µb.selfieManager.destroy();
µb.staticFilteringReverseLookup.resetLists();
@ -703,23 +703,22 @@
/******************************************************************************/
µBlock.compileFilters = function(rawText) {
var networkFilters = new this.CompiledLineWriter(),
cosmeticFilters = new this.CompiledLineWriter();
var writer = new this.CompiledLineWriter();
// Useful references:
// https://adblockplus.org/en/filter-cheatsheet
// https://adblockplus.org/en/filters
var staticNetFilteringEngine = this.staticNetFilteringEngine,
cosmeticFilteringEngine = this.cosmeticFilteringEngine,
staticExtFilteringEngine = this.staticExtFilteringEngine,
reIsWhitespaceChar = /\s/,
reMaybeLocalIp = /^[\d:f]/,
reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)(?=\s|$)/,
reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)\b/,
reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/,
line, lineRaw, c, pos,
line, c, pos,
lineIter = new this.LineIterator(rawText);
while ( lineIter.eot() === false ) {
line = lineRaw = lineIter.next().trim();
line = lineIter.next().trim();
// rhill 2014-04-18: The trim is important here, as without it there
// could be a lingering `\r` which would cause problems in the
@ -733,9 +732,7 @@
// Parse or skip cosmetic filters
// All cosmetic filters are caught here
if ( cosmeticFilteringEngine.compile(line, cosmeticFilters) ) {
continue;
}
if ( staticExtFilteringEngine.compile(line, writer) ) { continue; }
// Whatever else is next can be assumed to not be a cosmetic filter
@ -767,12 +764,10 @@
if ( line.length === 0 ) { continue; }
staticNetFilteringEngine.compile(line, networkFilters);
staticNetFilteringEngine.compile(line, writer);
}
return networkFilters.toString() +
'\n/* end of network - start of cosmetic */\n' +
cosmeticFilters.toString();
return writer.toString();
};
/******************************************************************************/
@ -783,15 +778,12 @@
µBlock.applyCompiledFilters = function(rawText, firstparty) {
if ( rawText === '' ) { return; }
var separator = '\n/* end of network - start of cosmetic */\n',
pos = rawText.indexOf(separator),
reader = new this.CompiledLineReader(rawText.slice(0, pos));
var reader = new this.CompiledLineReader(rawText);
this.staticNetFilteringEngine.fromCompiledContent(reader);
this.cosmeticFilteringEngine.fromCompiledContent(
reader.reset(rawText.slice(pos + separator.length)),
this.userSettings.ignoreGenericCosmeticFilters,
!firstparty && !this.userSettings.parseAllABPHideFilters
);
this.staticExtFilteringEngine.fromCompiledContent(reader, {
skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters,
skipCosmetic: !firstparty && !this.userSettings.parseAllABPHideFilters
});
};
/******************************************************************************/
@ -885,7 +877,7 @@
availableFilterLists: this.availableFilterLists,
staticNetFilteringEngine: this.staticNetFilteringEngine.toSelfie(),
redirectEngine: this.redirectEngine.toSelfie(),
cosmeticFilteringEngine: this.cosmeticFilteringEngine.toSelfie()
staticExtFilteringEngine: this.staticExtFilteringEngine.toSelfie()
};
vAPI.cacheStorage.set({ selfie: selfie });
}.bind(µBlock);
@ -1068,7 +1060,7 @@
this.availableFilterLists.hasOwnProperty(details.assetKey) === false ||
this.selectedFilterLists.indexOf(details.assetKey) === -1
) {
return false;
return;
}
}
// https://github.com/gorhill/uBlock/issues/2594
@ -1077,10 +1069,10 @@
this.hiddenSettings.ignoreRedirectFilters === true &&
this.hiddenSettings.ignoreScriptInjectFilters === true
) {
return false;
return;
}
}
return;
return true;
}
// Compile the list while we have the raw version in memory

View File

@ -480,9 +480,10 @@ onBeforeMaybeSpuriousCSPReport.textDecoder = undefined;
/******************************************************************************/
// To handle:
// - inline script tags
// - websockets
// - media elements larger than n kB
// - Media elements larger than n kB
// - Scriptlet injection (requires ability to modify response body)
// - HTML filtering (requires ability to modify response body)
// - CSP injection
var onHeadersReceived = function(details) {
// Do not interfere with behind-the-scene requests.
@ -490,15 +491,17 @@ var onHeadersReceived = function(details) {
if ( vAPI.isBehindTheSceneTabId(tabId) ) { return; }
var µb = µBlock,
requestType = details.type;
requestType = details.type,
isRootDoc = requestType === 'main_frame',
isDoc = isRootDoc || requestType === 'sub_frame';
if ( requestType === 'main_frame' ) {
if ( isRootDoc ) {
µb.tabContextManager.push(tabId, details.url);
}
var pageStore = µb.pageStoreFromTabId(tabId);
if ( pageStore === null ) {
if ( requestType !== 'main_frame' ) { return; }
if ( isRootDoc === false ) { return; }
pageStore = µb.bindTabToPageStats(tabId, 'beforeRequest');
}
if ( pageStore.getNetFilteringSwitch() === false ) { return; }
@ -507,24 +510,283 @@ var onHeadersReceived = function(details) {
return foilLargeMediaElement(pageStore, details);
}
if ( isDoc && µb.canFilterResponseBody ) {
filterDocument(details);
}
// https://github.com/gorhill/uBlock/issues/2813
// Disable the blocking of large media elements if the document is itself
// a media element: the resource was not prevented from loading so no
// point to further block large media elements for the current document.
if ( requestType === 'main_frame' ) {
if ( isRootDoc ) {
if ( reMediaContentTypes.test(headerValueFromName('content-type', details.responseHeaders)) ) {
pageStore.allowLargeMediaElementsUntil = Date.now() + 86400000;
}
return injectCSP(pageStore, details);
}
if ( requestType === 'sub_frame' ) {
if ( isDoc ) {
return injectCSP(pageStore, details);
}
};
var reMediaContentTypes = /^(?:audio|image|video)\//;
/*******************************************************************************
The response body filterer is responsible for:
- Scriptlet filtering
- HTML filtering
In the spirit of efficiency, the response body filterer works this way:
If:
- HTML filtering: no.
- Scriptlet filtering: no.
Then:
No response body filtering is initiated.
If:
- HTML filtering: no.
- Scriptlet filtering: yes.
Then:
Inject scriptlets before first chunk of response body data reported
then immediately disconnect response body data listener.
If:
- HTML filtering: yes.
- Scriptlet filtering: no/yes.
Then:
Assemble all response body data into a single buffer. Once all the
response data has been received, create a document from it. Then:
- Inject scriptlets in the resulting DOM.
- Remove all DOM elements matching HTML filters.
Then serialize the resulting modified document as the new response
body.
This way, the overhead is minimal for when only scriptlets need to be
injected.
If the platform does not support response body filtering, the scriptlets
will be injected the old way, through the content script.
**/
var filterDocument = (function() {
var µb = µBlock,
filterers = new Map(),
reDoctype = /^\s*<!DOCTYPE\b[^>]+?>/,
reJustASCII = /^[\x00-\x7E]*$/,
domParser, xmlSerializer,
textDecoderCharset, textDecoder, textEncoder;
var streamJobDone = function(filterer, responseBytes) {
if (
filterer.scriptlets === undefined ||
filterer.selectors !== undefined ||
filterer.charset !== undefined
) {
return false;
}
if ( textDecoder === undefined ) {
textDecoder = new TextDecoder();
}
// We need to insert after DOCTYPE, or else the browser may falls into
// quirks mode.
var responseStr = textDecoder.decode(responseBytes);
var match = reDoctype.exec(responseStr);
if ( match === null ) { return false; }
filterers.delete(filterer.stream);
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
var beforeByteLength = match.index + match[0].length;
var beforeBytes = reJustASCII.test(match[0]) ?
new Uint8Array(responseBytes, 0, beforeByteLength) :
textEncoder.encode(responseStr.slice(0, beforeByteLength));
filterer.stream.write(beforeBytes);
filterer.stream.write(
textEncoder.encode('<script>' + filterer.scriptlets + '</script>')
);
filterer.stream.write(
new Uint8Array(responseBytes, beforeBytes.byteLength)
);
filterer.stream.disconnect();
return true;
};
var streamClose = function(filterer, buffer) {
if ( buffer !== undefined ) {
filterer.stream.write(buffer);
} else if ( filterer.buffer !== undefined ) {
filterer.stream.write(filterer.buffer);
}
filterer.stream.close();
};
var onStreamData = function(ev) {
var filterer = filterers.get(this);
if ( filterer === undefined ) {
this.write(ev.data);
this.disconnect();
return;
}
if (
this.status !== 'transferringdata' &&
this.status !== 'finishedtransferringdata'
) {
filterers.delete(this);
this.disconnect();
return;
}
// TODO: possibly improve buffer growth, if benchmarking shows it's
// worth it.
if ( filterer.buffer === null ) {
if ( streamJobDone(filterer, ev.data) ) { return; }
filterer.buffer = new Uint8Array(ev.data);
return;
}
var buffer = new Uint8Array(
filterer.buffer.byteLength +
ev.data.byteLength
);
buffer.set(filterer.buffer);
buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength);
filterer.buffer = buffer;
};
var onStreamStop = function() {
var filterer = filterers.get(this);
filterers.delete(this);
if ( filterer === undefined || filterer.buffer === null ) {
this.close();
return;
}
if ( this.status !== 'finishedtransferringdata' ) { return; }
if ( domParser === undefined ) {
domParser = new DOMParser();
xmlSerializer = new XMLSerializer();
}
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
}
// In case of unknown charset, assume utf-8.
if ( filterer.charset !== textDecoderCharset ) {
textDecoder = undefined;
}
if ( textDecoder === undefined ) {
try {
textDecoder = new TextDecoder(filterer.charset);
textDecoderCharset = filterer.charset;
} catch(ex) {
textDecoder = new TextDecoder();
textDecoderCharset = undefined;
}
}
var doc = domParser.parseFromString(
textDecoder.decode(filterer.buffer),
'text/html'
);
var modified = false;
if ( filterer.selectors !== undefined ) {
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
modified = true;
}
}
if ( filterer.scriptlets !== undefined ) {
if ( µb.scriptletFilteringEngine.apply(doc, filterer) ) {
modified = true;
}
}
if ( modified === false ) {
streamClose(filterer);
return;
}
// If the charset of the document was not utf-8, we need to change it
// to utf-8.
if ( textDecoderCharset !== undefined ) {
var meta = doc.createElement('meta');
meta.setAttribute('charset', 'utf-8');
doc.head.insertBefore(meta, doc.head.firstChild);
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
var doctypeStr = doc.doctype instanceof Object ?
xmlSerializer.serializeToString(doc.doctype) + '\n' :
'';
streamClose(
filterer,
textEncoder.encode(doctypeStr + doc.documentElement.outerHTML)
);
};
var onStreamError = function() {
filterers.delete(this);
};
return function(details) {
var hostname = µb.URI.hostnameFromURI(details.url);
if ( hostname === '' ) { return; }
var domain = µb.URI.domainFromHostname(hostname);
var request = {
stream: undefined,
tabId: details.tabId,
url: details.url,
hostname: hostname,
domain: domain,
entity: µb.URI.entityFromDomain(domain),
selectors: undefined,
scriptlets: undefined,
buffer: null,
charset: undefined
};
request.selectors = µb.htmlFilteringEngine.retrieve(request);
request.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
if (
request.selectors === undefined &&
request.scriptlets === undefined
) {
return;
}
var headers = details.responseHeaders,
contentType = headerValueFromName('content-type', headers);
if ( contentType !== '' ) {
if ( reContentTypeDocument.test(contentType) === false ) { return; }
var match = reContentTypeCharset.exec(contentType);
if ( match !== null ) {
var charset = match[1].toLowerCase();
if ( charset !== 'utf-8' ) {
request.charset = charset;
}
}
}
// https://bugzilla.mozilla.org/show_bug.cgi?id=1426789
if ( headerValueFromName('content-disposition', headers) ) { return; }
var stream = request.stream =
vAPI.net.webRequest.filterResponseData(details.requestId);
stream.ondata = onStreamData;
stream.onstop = onStreamStop;
stream.onerror = onStreamError;
filterers.set(stream, request);
};
})();
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i;
var reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
/******************************************************************************/
var injectCSP = function(pageStore, details) {

View File

@ -1,7 +1,7 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-2016 Raymond Hill
Copyright (C) 2014-2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -309,6 +309,13 @@ var psl = publicSuffixList;
/******************************************************************************/
URI.entityFromDomain = function(domain) {
var pos = domain.indexOf('.');
return pos !== -1 ? domain.slice(0, pos) + '.*' : '';
};
/******************************************************************************/
URI.pathFromURI = function(uri) {
var matches = rePathFromURI.exec(uri);
return matches !== null ? matches[1] : '';

View File

@ -225,7 +225,9 @@
/******************************************************************************/
µBlock.CompiledLineWriter = function() {
this.output = [];
this.blockId = undefined;
this.block = undefined;
this.blocks = new Map();
this.stringifier = JSON.stringify;
};
@ -235,46 +237,81 @@
µBlock.CompiledLineWriter.prototype = {
push: function(args) {
this.output[this.output.length] = this.stringifier(args);
this.block[this.block.length] = this.stringifier(args);
},
select: function(blockId) {
if ( blockId === this.blockId ) { return; }
this.blockId = blockId;
this.block = this.blocks.get(blockId);
if ( this.block === undefined ) {
this.blocks.set(blockId, (this.block = []));
}
},
toString: function() {
return this.output.join('\n');
var result = [];
for ( var entry of this.blocks ) {
if ( entry[1].length === 0 ) { continue; }
result.push(
'#block-start-' + entry[0],
entry[1].join('\n'),
'#block-end-' + entry[0]
);
}
return result.join('\n');
}
};
µBlock.CompiledLineReader = function(raw) {
this.reset(raw);
/******************************************************************************/
µBlock.CompiledLineReader = function(raw, blockId) {
this.block = '';
this.len = 0;
this.offset = 0;
this.line = '';
this.parser = JSON.parse;
this.blocks = new Map();
var reBlockStart = /^#block-start-(\d+)\n/gm,
match = reBlockStart.exec(raw),
beg, end;
while ( match !== null ) {
beg = match.index + match[0].length;
end = raw.indexOf('#block-end-' + match[1], beg);
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw);
}
if ( blockId !== undefined ) {
this.select(blockId);
}
};
µBlock.CompiledLineReader.prototype = {
reset: function(raw) {
this.input = raw;
this.len = raw.length;
this.offset = 0;
this.s = '';
return this;
},
next: function() {
if ( this.offset === this.len ) {
this.s = '';
this.line = '';
return false;
}
var pos = this.input.indexOf('\n', this.offset);
var pos = this.block.indexOf('\n', this.offset);
if ( pos !== -1 ) {
this.s = this.input.slice(this.offset, pos);
this.line = this.block.slice(this.offset, pos);
this.offset = pos + 1;
} else {
this.s = this.input.slice(this.offset);
this.line = this.block.slice(this.offset);
this.offset = this.len;
}
return true;
},
select: function(blockId) {
this.block = this.blocks.get(blockId) || '';
this.len = this.block.length;
this.offset = 0;
return this;
},
fingerprint: function() {
return this.s;
return this.line;
},
args: function() {
return this.parser(this.s);
return this.parser(this.line);
}
};