[mv3] Inject specific cosmetic filters through scriptlet injection

This solves the following remaining issues regarding specific cosmetic
filtering:
- High rate of false positives in last build
- High number of generated content css files in the package
This commit is contained in:
Raymond Hill 2022-09-19 08:55:45 -04:00
parent 3f8f6d14ab
commit 4bd02c0fb6
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
7 changed files with 264 additions and 172 deletions

View File

@ -109,9 +109,10 @@ const toRegisterable = (fname, entry) => {
directive.excludeMatches = matchesFromHostnames(entry.excludeMatches);
}
if ( fname.at(-1) === CSS_TYPE ) {
directive.css = [
`/rulesets/css/${fname.slice(0,1)}/${fname.slice(1,2)}/${fname.slice(2)}.css`
directive.js = [
`/rulesets/css/${fname.slice(0,1)}/${fname.slice(1)}.js`,
];
directive.runAt = 'document_start';
} else if ( fname.at(-1) === JS_TYPE ) {
directive.js = [
`/rulesets/js/${fname}.js`
@ -123,17 +124,20 @@ const toRegisterable = (fname, entry) => {
return directive;
};
const toMaybeUpdatable = (registered, candidate) => {
/******************************************************************************/
const shouldUpdate = (registered, candidate) => {
const matches = candidate.matches &&
matchesFromHostnames(candidate.matches);
if ( arrayEq(registered.matches, matches) === false ) {
return toRegisterable(candidate);
return true;
}
const excludeMatches = candidate.excludeMatches &&
matchesFromHostnames(candidate.excludeMatches);
if ( arrayEq(registered.excludeMatches, excludeMatches) === false ) {
return toRegisterable(candidate);
return true;
}
return false;
};
/******************************************************************************/
@ -228,9 +232,8 @@ async function registerInjectable() {
toAdd.push(toRegisterable(fname, entry));
continue;
}
const updated = toMaybeUpdatable(before.get(fname), entry);
if ( updated !== undefined ) {
toUpdate.push(updated);
if ( shouldUpdate(before.get(fname), entry) ) {
toUpdate.push(toRegisterable(fname, entry));
}
}

View File

@ -294,161 +294,8 @@ async function processNetworkFilters(assetDetails, network) {
/******************************************************************************/
function addScriptingAPIResources(id, entry, prop, fname) {
if ( entry[prop] === undefined ) { return; }
for ( const hn of entry[prop] ) {
let details = scriptingDetails.get(id);
if ( details === undefined ) {
details = {
matches: new Map(),
excludeMatches: new Map(),
};
scriptingDetails.set(id, details);
}
let fnames = details[prop].get(hn);
if ( fnames === undefined ) {
fnames = new Set();
details[prop].set(hn, fnames);
}
fnames.add(fname);
}
}
/******************************************************************************/
const globalCSSFileSet = new Set();
// Using a at-rule layer declaration allows to raise uBOL's styles above
// that of the page.
const cssDeclaration =
`@layer {
$selector$ {
display:none!important;
}
}`;
function processCosmeticFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
// Drop worryingly generic-looking selectors, they are too likely to
// cause false positives on unrelated sites. It's the price for a Lite
// version. Examples:
// div[style*="z-index:"]
// [style*="opacity: 0"]
for ( const s of mapin.keys() ) {
if ( /^[a-z]*\[style[^\]]*\](:|$)/.test(s) === false ) { continue; }
// `[style]` attributes with `/` characters are probably ok since they
// likely refer to specific `url()` property.
if ( s.indexOf('/') !== -1 ) { continue; }
// `[style]` attributes with dimension properties might be specific
// enough after all.
if ( /\b(height|width)\s*:\s*\d+px\b/.test(s) ) { continue; }
//console.log(`\tDropping ${s}`);
mapin.delete(s);
}
// This groups together selectors which are used by a the same hostname.
const optimizeExtendedFilters = filters => {
if ( filters === undefined ) { return []; }
const merge = new Map();
for ( const [ selector, details ] of filters ) {
const json = JSON.stringify(details);
let entries = merge.get(json);
if ( entries === undefined ) {
entries = new Set();
merge.set(json, entries);
}
entries.add(selector);
}
const out = [];
for ( const [ json, entries ] of merge ) {
const details = JSON.parse(json);
details.payload = Array.from(entries);
out.push(details);
}
return out;
};
const optimized = optimizeExtendedFilters(mapin);
// This creates a map of unique selectorset => all hostnames
// including/excluding the selectorset. This allows to avoid duplication
// of css content.
const cssContentMap = new Map();
for ( const entry of optimized ) {
const selectors = entry.payload.map(s => ` ${s}`).join(',\n');
// ends-with 0 = css resource
const fname = uid(selectors) + '0';
let contentDetails = cssContentMap.get(fname);
if ( contentDetails === undefined ) {
contentDetails = { selectors };
cssContentMap.set(fname, contentDetails);
}
if ( entry.matches !== undefined ) {
if ( contentDetails.matches === undefined ) {
contentDetails.matches = new Set();
}
for ( const hn of entry.matches ) {
contentDetails.matches.add(hn);
}
}
if ( entry.excludeMatches !== undefined ) {
if ( contentDetails.excludeMatches === undefined ) {
contentDetails.excludeMatches = new Set();
}
for ( const hn of entry.excludeMatches ) {
contentDetails.excludeMatches.add(hn);
}
}
}
// We do not want more than 128 CSS files per subscription, so we will
// group multiple unrelated selectors in the same file and hope this does
// not cause false positives.
const contentPerFile = Math.ceil(cssContentMap.size / 128);
const cssContentArray = Array.from(cssContentMap).map(entry => entry[1]);
let distinctResourceCount = 0;
for ( let i = 0; i < cssContentArray.length; i += contentPerFile ) {
const slice = cssContentArray.slice(i, i + contentPerFile);
const matches = slice.map(entry =>
Array.from(entry.matches || [])
).flat();
const excludeMatches = slice.map(entry =>
Array.from(entry.excludeMatches || [])
).flat();
const selectors = slice.map(entry =>
entry.selectors
).join(',\n');
const fname = uid(selectors) + '0';
if ( globalCSSFileSet.has(fname) === false ) {
globalCSSFileSet.add(fname);
const fpath = `${fname.slice(0,1)}/${fname.slice(1,2)}/${fname.slice(2)}`;
writeFile(
`${cssDir}/${fpath}.css`,
cssDeclaration.replace('$selector$', selectors)
);
distinctResourceCount += 1;
}
addScriptingAPIResources(
assetDetails.id,
{ matches },
'matches',
fname
);
addScriptingAPIResources(
assetDetails.id,
{ excludeMatches },
'excludeMatches',
fname
);
}
log(`CSS entries: ${distinctResourceCount}`);
return distinctResourceCount;
}
/******************************************************************************/
// TODO: unify css/scriptlet processing code since now css styles are
// injected using scriptlet injection.
// Load all available scriptlets into a key-val map, where the key is the
// scriptlet token, and val is the whole content of the file.
@ -492,8 +339,173 @@ function loadAllSourceScriptlets() {
return scriptletsMapPromise;
}
/******************************************************************************/
const globalPatchedScriptletsSet = new Set();
function addScriptingAPIResources(id, entry, prop, fname) {
if ( entry[prop] === undefined ) { return; }
for ( const hn of entry[prop] ) {
let details = scriptingDetails.get(id);
if ( details === undefined ) {
details = {
matches: new Map(),
excludeMatches: new Map(),
};
scriptingDetails.set(id, details);
}
let fnames = details[prop].get(hn);
if ( fnames === undefined ) {
fnames = new Set();
details[prop].set(hn, fnames);
}
fnames.add(fname);
}
}
/******************************************************************************/
async function processCosmeticFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
// This groups together selectors which are used by the same hostname.
const optimized = (filters => {
if ( filters === undefined ) { return []; }
const merge = new Map();
for ( const [ selector, details ] of filters ) {
const json = JSON.stringify(details);
let entries = merge.get(json);
if ( entries === undefined ) {
entries = new Set();
merge.set(json, entries);
}
entries.add(selector);
}
const out = [];
for ( const [ json, entries ] of merge ) {
const details = JSON.parse(json);
details.selector = Array.from(entries).join(',');
out.push(details);
}
return out;
})(mapin);
// This creates a map of unique selectorset => all hostnames
// including/excluding the selectorset. This allows to avoid duplication
// of css content.
const cssContentMap = new Map();
for ( const entry of optimized ) {
// ends-with 0 = css resource
const id = parseInt(uid(entry.selector), 16);
let details = cssContentMap.get(id);
if ( details === undefined ) {
details = { a: entry.selector };
cssContentMap.set(id, details);
}
if ( entry.matches !== undefined ) {
if ( details.y === undefined ) {
details.y = new Set();
}
for ( const hn of entry.matches ) {
details.y.add(hn);
}
}
if ( entry.excludeMatches !== undefined ) {
if ( details.n === undefined ) {
details.n = new Set();
}
for ( const hn of entry.excludeMatches ) {
details.n.add(hn);
}
}
}
// We do not want more than 16 CSS files per subscription, so we will
// group multiple unrelated selectors in the same file, and distinct
// css declarations will be injected programmatically according to the
// hostname of the current document.
//
// The cosmetic filters will be injected programmatically as content
// script and the decisions to activate the cosmetic filters will be
// done at injection time according to the document's hostname.
const originalScriptletMap = await loadAllSourceScriptlets();
const contentPerFile = Math.ceil(cssContentMap.size / 16);
const cssContentArray = Array.from(cssContentMap);
const jsonReplacer = (k, v) => {
if ( k === 'n' ) {
if ( v === undefined || v.size === 0 ) { return; }
return Array.from(v);
}
if ( v instanceof Set || v instanceof Map ) {
if ( v.size === 0 ) { return; }
return Array.from(v);
}
return v;
};
const toHostnamesMap = (hostnames, id, out) => {
for ( const hn of hostnames ) {
const existing = out.get(hn);
if ( existing === undefined ) {
out.set(hn, id);
} else if ( Array.isArray(existing) ) {
existing.push(id);
} else {
out.set(hn, [ existing, id ]);
}
}
};
let distinctResourceCount = 0;
for ( let i = 0; i < cssContentArray.length; i += contentPerFile ) {
const slice = cssContentArray.slice(i, i + contentPerFile);
const argsMap = slice.map(entry => [
entry[0], { a: entry[1].a, n: entry[1].n }
]);
const hostnamesMap = new Map();
for ( const [ id, details ] of slice ) {
if ( details.y === undefined ) { continue; }
toHostnamesMap(details.y, id, hostnamesMap);
}
const patchedScriptlet = originalScriptletMap.get('css-specific')
.replace(
/\bself\.\$argsMap\$/m,
`${JSON.stringify(argsMap, jsonReplacer)}`
).replace(
/\bself\.\$hostnamesMap\$/m,
`${JSON.stringify(hostnamesMap, jsonReplacer)}`
);
// ends-with 0 = css resource
const fname = uid(patchedScriptlet) + '0';
if ( globalPatchedScriptletsSet.has(fname) === false ) {
globalPatchedScriptletsSet.add(fname);
writeFile(`${cssDir}/${fname.slice(0,1)}/${fname.slice(1)}.js`, patchedScriptlet, {});
distinctResourceCount += 1;
}
for ( const entry of slice ) {
addScriptingAPIResources(
assetDetails.id,
{ matches: entry[1].y },
'matches',
fname
);
addScriptingAPIResources(
assetDetails.id,
{ excludeMatches: entry[1].n },
'excludeMatches',
fname
);
}
}
log(`CSS entries: ${distinctResourceCount}`);
return distinctResourceCount;
}
/******************************************************************************/
async function processScriptletFilters(assetDetails, mapin) {
if ( mapin === undefined ) { return 0; }
@ -605,12 +617,10 @@ async function processScriptletFilters(assetDetails, mapin) {
};
for ( const [ token, argsDetails ] of scriptletDetails ) {
const argsMap = Array.from(argsDetails).map(entry => {
return [
const argsMap = Array.from(argsDetails).map(entry => [
parseInt(uid(entry[0]),16),
{ a: entry[1].a, n: entry[1].n }
];
});
]);
const hostnamesMap = new Map();
for ( const [ argsHash, details ] of argsDetails ) {
toHostnamesMap(details.y, parseInt(uid(argsHash),16), hostnamesMap);

View File

@ -146,6 +146,7 @@ const scriptlet = (
/******************************************************************************/
const argsMap = new Map(self.$argsMap$);
const hostnamesMap = new Map(self.$hostnamesMap$);
let hn;

View File

@ -0,0 +1,79 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2019-present Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
The scriptlets below are meant to be injected only into a
web page context.
*/
/* jshint esversion:11 */
'use strict';
/******************************************************************************/
/// name css-specific
/******************************************************************************/
// Important!
// Isolate from global scope
(function() {
/******************************************************************************/
const argsMap = new Map(self.$argsMap$);
const hostnamesMap = new Map(self.$hostnamesMap$);
let hn;
try { hn = document.location.hostname; } catch(ex) { }
const styles = [];
while ( hn ) {
if ( hostnamesMap.has(hn) ) {
let argsHashes = hostnamesMap.get(hn);
if ( typeof argsHashes === 'number' ) { argsHashes = [ argsHashes ]; }
for ( const argsHash of argsHashes ) {
const details = argsMap.get(argsHash);
if ( details.n && details.n.includes(hn) ) { continue; }
styles.push(details.a);
}
}
const pos = hn.indexOf('.');
if ( pos === -1 ) { break; }
hn = hn.slice(pos + 1);
}
if ( styles.length === 0 ) { return; }
try {
const sheet = new CSSStyleSheet();
sheet.replace(`@layer{${styles.join(',')}{display:none!important;}}`);
document.adoptedStyleSheets = [
...document.adoptedStyleSheets,
sheet
];
} catch(ex) {
}
/******************************************************************************/
})();
/******************************************************************************/

View File

@ -122,6 +122,7 @@ const scriptlet = (
/******************************************************************************/
const argsMap = new Map(self.$argsMap$);
const hostnamesMap = new Map(self.$hostnamesMap$);
let hn;

View File

@ -164,6 +164,7 @@ const scriptlet = (
/******************************************************************************/
const argsMap = new Map(self.$argsMap$);
const hostnamesMap = new Map(self.$hostnamesMap$);
let hn;

View File

@ -93,15 +93,12 @@ function addExtendedToDNR(context, parser) {
// https://github.com/chrisaljoudi/uBlock/issues/151
// Negated hostname means the filter applies to all non-negated hostnames
// of same filter OR globally if there is no non-negated hostnames.
// Drop selectors which can potentially lead to the hiding of
// html/body elements.
for ( const { hn, not, bad } of parser.extOptions() ) {
if ( bad ) { continue; }
if ( hn.endsWith('.*') ) { continue; }
const { compiled, exception } = parser.result;
if ( compiled.startsWith('{') ) { continue; }
if ( exception ) { continue; }
if ( /(^|[^\w#.\-\[])(html|body)(,|$)/i.test(compiled) ) { continue; }
let details = context.cosmeticFilters.get(compiled);
if ( details === undefined ) {
details = {};