From 26594fb902b1042843ebda1ab3b3aa6304ed3ae9 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Wed, 7 Dec 2022 10:30:09 -0500 Subject: [PATCH] Rework generic cosmetic filtering code Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/2248 --- src/js/background.js | 4 +- src/js/contentscript.js | 358 +++++++++++------------ src/js/cosmetic-filtering.js | 535 ++++++++++++----------------------- src/js/messaging.js | 4 + 4 files changed, 351 insertions(+), 550 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index cbc12529a..3ac267482 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 47, // Increase when compiled format changes - selfieMagic: 47, // Increase when selfie format changes + compiledMagic: 48, // Increase when compiled format changes + selfieMagic: 48, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/contentscript.js b/src/js/contentscript.js index 74ec10bfb..e6c7b8bf8 100644 --- a/src/js/contentscript.js +++ b/src/js/contentscript.js @@ -945,72 +945,65 @@ vAPI.DOMFilterer = class { // vAPI.domSurveyor { - const messaging = vAPI.messaging; - const queriedIds = new Set(); - const queriedClasses = new Set(); + // https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ + // Must mirror cosmetic filtering compiler's version + const hashFromStr = (type, s) => { + const len = s.length; + const step = len + 7 >>> 3; + let hash = (type << 5) - type + (len & 0xFF) | 0; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) - hash + s.charCodeAt(i) | 0; + } + return hash & 0xFFFFFF; + }; + + const addHashes = hashes => { + for ( const hash of hashes ) { + queriedHashes.add(hash); + } + }; + + const queriedHashes = new Set(); const maxSurveyNodes = 65536; - const maxSurveyTimeSlice = 4; - const maxSurveyBuffer = 64; + const pendingLists = []; + const pendingNodes = []; + const processedSet = new Set(); + let domFilterer; + let hostname = ''; + let domChanged = false; + let scannedCount = 0; + let stopped = false; - let domFilterer, - hostname = '', - surveyCost = 0; + const addPendingList = list => { + if ( list.length === 0 ) { return; } + pendingLists.push(Array.from(list)); + }; - const pendingNodes = { - nodeLists: [], - buffer: [ - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null, - ], - j: 0, - accepted: 0, - iterated: 0, - stopped: false, - add(nodes) { - if ( nodes.length === 0 || this.accepted >= maxSurveyNodes ) { - return; + const nextPendingNodes = ( ) => { + if ( pendingLists.length === 0 ) { return 0; } + const bufferSize = 256; + let j = 0; + do { + const nodeList = pendingLists[0]; + let n = bufferSize - j; + if ( n > nodeList.length ) { + n = nodeList.length; } - this.nodeLists.push(nodes); - this.accepted += nodes.length; - }, - next() { - if ( this.nodeLists.length === 0 || this.stopped ) { return 0; } - const nodeLists = this.nodeLists; - let ib = 0; - do { - const nodeList = nodeLists[0]; - let j = this.j; - let n = j + maxSurveyBuffer - ib; - if ( n > nodeList.length ) { - n = nodeList.length; - } - for ( let i = j; i < n; i++ ) { - this.buffer[ib++] = nodeList[j++]; - } - if ( j !== nodeList.length ) { - this.j = j; - break; - } - this.j = 0; - this.nodeLists.shift(); - } while ( ib < maxSurveyBuffer && nodeLists.length !== 0 ); - this.iterated += ib; - if ( this.iterated >= maxSurveyNodes ) { - this.nodeLists = []; - this.stopped = true; - //console.info(`domSurveyor> Surveyed a total of ${this.iterated} nodes. Enough.`); + for ( let i = 0; i < n; i++ ) { + pendingNodes[j+i] = nodeList[i]; } - return ib; - }, - hasNodes() { - return this.nodeLists.length !== 0; - }, + j += n; + if ( n !== nodeList.length ) { + pendingLists[0] = nodeList.slice(n); + break; + } + pendingLists.shift(); + } while ( j < bufferSize && pendingLists.length !== 0 ); + return j; + }; + + const hasPendingNodes = ( ) => { + return pendingLists.length !== 0; }; // Extract all classes/ids: these will be passed to the cosmetic @@ -1024,10 +1017,10 @@ vAPI.DOMFilterer = class { const idFromNode = (node, out) => { const raw = node.id; if ( typeof raw !== 'string' || raw.length === 0 ) { return; } - const s = raw.trim(); - if ( queriedIds.has(s) || s.length === 0 ) { return; } - out.push(s); - queriedIds.add(s); + const hash = hashFromStr(0x23 /* '#' */, raw.trim()); + if ( queriedHashes.has(hash) ) { return; } + queriedHashes.add(hash); + out.push(hash); }; // https://github.com/uBlockOrigin/uBlock-issues/discussions/2076 @@ -1036,73 +1029,83 @@ vAPI.DOMFilterer = class { const s = node.getAttribute('class'); if ( typeof s !== 'string' ) { return; } const len = s.length; - for ( let beg = 0, end = 0, token = ''; beg < len; beg += 1 ) { + for ( let beg = 0, end = 0; beg < len; beg += 1 ) { end = s.indexOf(' ', beg); if ( end === beg ) { continue; } if ( end === -1 ) { end = len; } - token = s.slice(beg, end); + const hash = hashFromStr(0x2E /* '.' */, s.slice(beg, end)); beg = end; - if ( queriedClasses.has(token) ) { continue; } - out.push(token); - queriedClasses.add(token); + if ( queriedHashes.has(hash) ) { continue; } + queriedHashes.add(hash); + out.push(hash); } }; - const surveyPhase1 = function() { - //console.time('dom surveyor/surveying'); + const getSurveyResults = hashes => { + if ( self.vAPI.messaging instanceof Object === false ) { + stop(); return; + } + const promise = hashes.length === 0 + ? Promise.resolve(null) + : self.vAPI.messaging.send('contentscript', { + what: 'retrieveGenericCosmeticSelectors', + hostname, + hashes, + exceptions: domFilterer.exceptions, + }); + promise.then(response => { + processSurveyResults(response); + }); + }; + + const doSurvey = ( ) => { const t0 = performance.now(); - const ids = []; - const classes = []; - const nodes = pendingNodes.buffer; - const deadline = t0 + maxSurveyTimeSlice; + const hashes = []; + const nodes = pendingNodes; + const deadline = t0 + 4; let processed = 0; + let scanned = 0; for (;;) { - const n = pendingNodes.next(); + const n = nextPendingNodes(); if ( n === 0 ) { break; } for ( let i = 0; i < n; i++ ) { const node = nodes[i]; nodes[i] = null; - idFromNode(node, ids); - classesFromNode(node, classes); + if ( domChanged ) { + if ( processedSet.has(node) ) { continue; } + processedSet.add(node); + } + idFromNode(node, hashes); + classesFromNode(node, hashes); + scanned += 1; } processed += n; if ( performance.now() >= deadline ) { break; } } - const t1 = performance.now(); - surveyCost += t1 - t0; - //console.info(`domSurveyor> Surveyed ${processed} nodes in ${(t1-t0).toFixed(2)} ms`); - // Phase 2: Ask main process to lookup relevant cosmetic filters. - if ( ids.length !== 0 || classes.length !== 0 ) { - messaging.send('contentscript', { - what: 'retrieveGenericCosmeticSelectors', - hostname, - ids, classes, - exceptions: domFilterer.exceptions, - cost: surveyCost, - }).then(response => { - surveyPhase3(response); - }); - } else { - surveyPhase3(null); + //console.info(`[domSurveyor][${hostname}] Surveyed ${scanned}/${processed} nodes in ${(performance.now()-t0).toFixed(2)} ms: ${hashes.length} hashes`); + scannedCount += scanned; + if ( scannedCount >= maxSurveyNodes ) { + stop(); } - //console.timeEnd('dom surveyor/surveying'); + processedSet.clear(); + getSurveyResults(hashes); }; - const surveyTimer = new vAPI.SafeAnimationFrame(surveyPhase1); + const surveyTimer = new vAPI.SafeAnimationFrame(doSurvey); // This is to shutdown the surveyor if result of surveying keeps being // fruitless. This is useful on long-lived web page. I arbitrarily // picked 5 minutes before the surveyor is allowed to shutdown. I also // arbitrarily picked 256 misses before the surveyor is allowed to // shutdown. - let canShutdownAfter = Date.now() + 300000, - surveyingMissCount = 0; + let canShutdownAfter = Date.now() + 300000; + let surveyResultMissCount = 0; // Handle main process' response. - const surveyPhase3 = function(response) { + const processSurveyResults = response => { + if ( stopped ) { return; } const result = response && response.result; let mustCommit = false; - if ( result ) { const css = result.injectedCSS; if ( typeof css === 'string' && css.length !== 0 ) { @@ -1114,99 +1117,86 @@ vAPI.DOMFilterer = class { domFilterer.exceptCSSRules(selectors); } } - - if ( pendingNodes.stopped === false ) { - if ( pendingNodes.hasNodes() ) { - surveyTimer.start(1); - } - if ( mustCommit ) { - surveyingMissCount = 0; - canShutdownAfter = Date.now() + 300000; - return; - } - surveyingMissCount += 1; - if ( surveyingMissCount < 256 || Date.now() < canShutdownAfter ) { - return; - } + if ( hasPendingNodes() ) { + surveyTimer.start(1); } - - //console.info('dom surveyor shutting down: too many misses'); - - surveyTimer.clear(); - vAPI.domWatcher.removeListener(domWatcherInterface); - vAPI.domSurveyor = null; + if ( mustCommit ) { + surveyResultMissCount = 0; + canShutdownAfter = Date.now() + 300000; + return; + } + surveyResultMissCount += 1; + if ( surveyResultMissCount < 256 || Date.now() < canShutdownAfter ) { + return; + } + //console.info(`[domSurveyor][${hostname}] Shutting down, too many misses`); + stop(); + self.vAPI.messaging.send('contentscript', { + what: 'disableGenericCosmeticFilteringSurveyor', + hostname, + }); }; const domWatcherInterface = { onDOMCreated: function() { - if ( - self.vAPI instanceof Object === false || - vAPI.domSurveyor instanceof Object === false || - vAPI.domFilterer instanceof Object === false - ) { - if ( self.vAPI instanceof Object ) { - if ( vAPI.domWatcher instanceof Object ) { - vAPI.domWatcher.removeListener(domWatcherInterface); - } - vAPI.domSurveyor = null; - } - return; - } - //console.time('dom surveyor/dom layout created'); domFilterer = vAPI.domFilterer; - pendingNodes.add(document.querySelectorAll( - '[id]:not(html):not(body),[class]:not(html):not(body)' - )); - surveyTimer.start(); // https://github.com/uBlockOrigin/uBlock-issues/issues/1692 // Look-up safe-only selectors to mitigate probability of // html/body elements of erroneously being targeted. - const ids = [], classes = []; + const hashes = []; if ( document.documentElement !== null ) { - idFromNode(document.documentElement, ids); - classesFromNode(document.documentElement, classes); + idFromNode(document.documentElement, hashes); + classesFromNode(document.documentElement, hashes); } if ( document.body !== null ) { - idFromNode(document.body, ids); - classesFromNode(document.body, classes); + idFromNode(document.body, hashes); + classesFromNode(document.body, hashes); } - if ( ids.length !== 0 || classes.length !== 0 ) { - messaging.send('contentscript', { - what: 'retrieveGenericCosmeticSelectors', - hostname, - ids, classes, - exceptions: domFilterer.exceptions, - safeOnly: true, - }).then(response => { - surveyPhase3(response); - }); + addPendingList(document.querySelectorAll( + '[id]:not(html):not(body),[class]:not(html):not(body)' + )); + if ( hasPendingNodes() ) { + surveyTimer.start(); } - //console.timeEnd('dom surveyor/dom layout created'); }, onDOMChanged: function(addedNodes) { if ( addedNodes.length === 0 ) { return; } - //console.time('dom surveyor/dom layout changed'); + domChanged = true; for ( const node of addedNodes ) { - pendingNodes.add([ node ]); + addPendingList([ node ]); if ( node.firstElementChild === null ) { continue; } - pendingNodes.add(node.querySelectorAll( - '[id]:not(html):not(body),[class]:not(html):not(body)' - )); + addPendingList( + node.querySelectorAll( + '[id]:not(html):not(body),[class]:not(html):not(body)' + ) + ); } - if ( pendingNodes.hasNodes() ) { + if ( hasPendingNodes() ) { surveyTimer.start(1); } - //console.timeEnd('dom surveyor/dom layout changed'); } }; - const start = function(details) { - if ( vAPI.domWatcher instanceof Object === false ) { return; } + const start = details => { + if ( self.vAPI instanceof Object === false ) { return; } + if ( self.vAPI.domFilterer instanceof Object === false ) { return; } + if ( self.vAPI.domWatcher instanceof Object === false ) { return; } hostname = details.hostname; - vAPI.domWatcher.addListener(domWatcherInterface); + self.vAPI.domWatcher.addListener(domWatcherInterface); }; - vAPI.domSurveyor = { start }; + const stop = ( ) => { + stopped = true; + pendingLists.length = 0; + surveyTimer.clear(); + if ( self.vAPI instanceof Object === false ) { return; } + if ( self.vAPI.domWatcher instanceof Object ) { + self.vAPI.domWatcher.removeListener(domWatcherInterface); + } + self.vAPI.domSurveyor = null; + }; + + self.vAPI.domSurveyor = { start, addHashes }; } /******************************************************************************/ @@ -1218,7 +1208,7 @@ vAPI.DOMFilterer = class { // to be launched if/when needed. { - const bootstrapPhase2 = function() { + const onDomReady = ( ) => { // This can happen on Firefox. For instance: // https://github.com/gorhill/uBlock/issues/1893 if ( window.location === null ) { return; } @@ -1279,9 +1269,8 @@ vAPI.DOMFilterer = class { // an object -- let's stay around, we may be given the opportunity // to try bootstrapping again later. - const bootstrapPhase1 = function(response) { + const onResponseReady = response => { if ( response instanceof Object === false ) { return; } - vAPI.bootstrap = undefined; // cosmetic filtering engine aka 'cfe' @@ -1308,7 +1297,7 @@ vAPI.DOMFilterer = class { vAPI.domSurveyor = null; } else { const domFilterer = vAPI.domFilterer = new vAPI.DOMFilterer(); - if ( noGenericCosmeticFiltering || cfeDetails.noDOMSurveying ) { + if ( noGenericCosmeticFiltering || cfeDetails.disableSurveyor ) { vAPI.domSurveyor = null; } domFilterer.exceptions = cfeDetails.exceptionFilters; @@ -1316,10 +1305,9 @@ vAPI.DOMFilterer = class { domFilterer.addProceduralSelectors(cfeDetails.proceduralFilters); domFilterer.exceptCSSRules(cfeDetails.exceptedFilters); domFilterer.convertedProceduralFilters = cfeDetails.convertedProceduralFilters; + vAPI.userStylesheet.apply(); } - vAPI.userStylesheet.apply(); - // Library of resources is located at: // https://github.com/gorhill/uBlock/blob/master/assets/ublock/resources.txt if ( scriptlets && typeof self.uBO_scriptletsInjected !== 'boolean' ) { @@ -1328,26 +1316,18 @@ vAPI.DOMFilterer = class { vAPI.injectedScripts = scriptlets; } - if ( vAPI.domSurveyor instanceof Object ) { + if ( vAPI.domSurveyor ) { + if ( Array.isArray(cfeDetails.genericCosmeticHashes) ) { + vAPI.domSurveyor.addHashes(cfeDetails.genericCosmeticHashes); + } vAPI.domSurveyor.start(cfeDetails); } - // https://github.com/chrisaljoudi/uBlock/issues/587 - // If no filters were found, maybe the script was injected before - // uBlock's process was fully initialized. When this happens, pages - // won't be cleaned right after browser launch. - if ( - typeof document.readyState === 'string' && - document.readyState !== 'loading' - ) { - bootstrapPhase2(); - } else { - document.addEventListener( - 'DOMContentLoaded', - bootstrapPhase2, - { once: true } - ); + const readyState = document.readyState; + if ( readyState === 'interactive' || readyState === 'complete' ) { + return onDomReady(); } + document.addEventListener('DOMContentLoaded', onDomReady, { once: true }); }; vAPI.bootstrap = function() { @@ -1356,7 +1336,7 @@ vAPI.DOMFilterer = class { url: vAPI.effectiveSelf.location.href, needScriptlets: typeof self.uBO_scriptletsInjected !== 'boolean', }).then(response => { - bootstrapPhase1(response); + onResponseReady(response); }); }; } diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 466947fc0..9461a703a 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -32,12 +32,6 @@ import { StaticExtFilteringSessionDB, } from './static-ext-filtering-db.js'; -/******************************************************************************/ - -const cosmeticSurveyingMissCountMax = - parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || - 15; - /******************************************************************************/ /******************************************************************************/ @@ -48,71 +42,55 @@ const SelectorCacheEntry = class { reset() { this.cosmetic = new Set(); - this.cosmeticSurveyingMissCount = 0; + this.cosmeticHashes = new Set(); + this.disableSurveyor = false; this.net = new Map(); - this.lastAccessTime = Date.now(); + this.accessId = SelectorCacheEntry.accessId++; return this; } dispose() { - this.cosmetic = this.net = null; + this.cosmetic = this.cosmeticHashes = this.net = null; if ( SelectorCacheEntry.junkyard.length < 25 ) { SelectorCacheEntry.junkyard.push(this); } } addCosmetic(details) { - const selectors = details.selectors; - let i = selectors.length || 0; - // https://github.com/gorhill/uBlock/issues/2011 - // Avoiding seemingly pointless surveys only if they appear costly. - if ( details.first && i === 0 ) { - if ( (details.cost || 0) >= 80 ) { - this.cosmeticSurveyingMissCount += 1; - } - return; + const selectors = details.selectors.join(',\n'); + if ( selectors.length !== 0 ) { + this.cosmetic.add(selectors); } - this.cosmeticSurveyingMissCount = 0; - while ( i-- ) { - this.cosmetic.add(selectors[i]); + for ( const hash of details.hashes ) { + this.cosmeticHashes.add(hash); } } addNet(selectors) { if ( typeof selectors === 'string' ) { - this.addNetOne(selectors, Date.now()); + this.net.set(selectors, this.accessId); } else { - this.addNetMany(selectors, Date.now()); + this.net.set(selectors.join(',\n'), this.accessId); } // Net request-derived selectors: I limit the number of cached - // selectors, as I expect cases where the blocked net-requests + // selectors, as I expect cases where the blocked network requests // are never the exact same URL. - if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) { - return; - } - const dict = this.net; - const keys = Array.from(dict.keys()).sort(function(a, b) { - return dict.get(b) - dict.get(a); - }).slice(SelectorCacheEntry.netLowWaterMark); - let i = keys.length; - while ( i-- ) { - dict.delete(keys[i]); + if ( this.net.size < SelectorCacheEntry.netHighWaterMark ) { return; } + const keys = Array.from(this.net) + .sort((a, b) => b[1] - a[1]) + .slice(SelectorCacheEntry.netLowWaterMark) + .map(a => a[0]); + for ( const key of keys ) { + this.net.delete(key); } } - addNetOne(selector, now) { - this.net.set(selector, now); - } - - addNetMany(selectors, now) { - let i = selectors.length || 0; - while ( i-- ) { - this.net.set(selectors[i], now); - } + addNetOne(selector, token) { + this.net.set(selector, token); } add(details) { - this.lastAccessTime = Date.now(); + this.accessId = SelectorCacheEntry.accessId++; if ( details.type === 'cosmetic' ) { this.addCosmetic(details); } else { @@ -122,10 +100,9 @@ const SelectorCacheEntry = class { // https://github.com/chrisaljoudi/uBlock/issues/420 remove(type) { - this.lastAccessTime = Date.now(); + this.accessId = SelectorCacheEntry.accessId++; if ( type === undefined || type === 'cosmetic' ) { this.cosmetic.clear(); - this.cosmeticSurveyingMissCount = 0; } if ( type === undefined || type === 'net' ) { this.net.clear(); @@ -133,36 +110,41 @@ const SelectorCacheEntry = class { } retrieveToArray(iterator, out) { - for ( let selector of iterator ) { + for ( const selector of iterator ) { out.push(selector); } } retrieveToSet(iterator, out) { - for ( let selector of iterator ) { + for ( const selector of iterator ) { out.add(selector); } } - retrieve(type, out) { - this.lastAccessTime = Date.now(); - const iterator = type === 'cosmetic' ? this.cosmetic : this.net.keys(); - if ( Array.isArray(out) ) { - this.retrieveToArray(iterator, out); - } else { - this.retrieveToSet(iterator, out); - } + retrieveNet(out) { + this.accessId = SelectorCacheEntry.accessId++; + if ( this.net.size === 0 ) { return false; } + this.retrieveToArray(this.net.keys(), out); + return true; + } + + retrieveCosmetic(selectors, hashes) { + this.accessId = SelectorCacheEntry.accessId++; + if ( this.cosmetic.size === 0 ) { return false; } + this.retrieveToSet(this.cosmetic, selectors); + this.retrieveToArray(this.cosmeticHashes, hashes); + return true; } static factory() { const entry = SelectorCacheEntry.junkyard.pop(); - if ( entry ) { - return entry.reset(); - } - return new SelectorCacheEntry(); + return entry + ? entry.reset() + : new SelectorCacheEntry(); } }; +SelectorCacheEntry.accessId = 1; SelectorCacheEntry.netLowWaterMark = 20; SelectorCacheEntry.netHighWaterMark = 30; SelectorCacheEntry.junkyard = []; @@ -170,6 +152,61 @@ SelectorCacheEntry.junkyard = []; /******************************************************************************/ /******************************************************************************/ +// https://werxltd.com/wp/2010/05/13/javascript-implementation-of-javas-string-hashcode-method/ +// Must mirror content script surveyor's version + +const hashFromStr = (type, s) => { + const len = s.length; + const step = len + 7 >>> 3; + let hash = (type << 5) - type + (len & 0xFF) | 0; + for ( let i = 0; i < len; i += step ) { + hash = (hash << 5) - hash + s.charCodeAt(i) | 0; + } + return hash & 0xFFFFFF; +}; + +// https://github.com/gorhill/uBlock/issues/1668 +// The key must be literal: unescape escaped CSS before extracting key. +// It's an uncommon case, so it's best to unescape only when needed. + +const keyFromSelector = selector => { + let matches = rePlainSelector.exec(selector); + if ( matches === null ) { + matches = rePlainSelectorEx.exec(selector); + if ( matches !== null ) { return matches[1] || matches[2]; } + return; + } + let key = matches[0]; + if ( key.includes('\\') === false ) { return key; } + matches = rePlainSelectorEscaped.exec(selector); + if ( matches === null ) { return; } + key = ''; + const escaped = matches[0]; + let beg = 0; + reEscapeSequence.lastIndex = 0; + for (;;) { + matches = reEscapeSequence.exec(escaped); + if ( matches === null ) { + return key + escaped.slice(beg); + } + key += escaped.slice(beg, matches.index); + beg = reEscapeSequence.lastIndex; + if ( matches[1].length === 1 ) { + key += matches[1]; + } else { + key += String.fromCharCode(parseInt(matches[1], 16)); + } + } +}; + +const rePlainSelector = /^[#.][\w\\-]+/; +const rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/; +const rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/; +const reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g; + +/******************************************************************************/ +/******************************************************************************/ + // Cosmetic filter family tree: // // Generic @@ -186,18 +223,12 @@ SelectorCacheEntry.junkyard = []; // Specific filers can be enforced before the main document is loaded. const FilterContainer = function() { - this.rePlainSelector = /^[#.][\w\\-]+/; - this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/; - this.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/; - this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g; this.reSimpleHighGeneric = /^(?:[a-z]*\[[^\]]+\]|\S+)$/; - this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/; this.selectorCache = new Map(); this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes - this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes - this.selectorCacheCountMin = 25; - this.netSelectorCacheCountMax = SelectorCacheEntry.netHighWaterMark; + this.selectorCacheCountMin = 40; + this.selectorCacheCountMax = 50; this.selectorCacheTimer = null; // specific filters @@ -206,20 +237,8 @@ const FilterContainer = function() { // temporary filters this.sessionFilterDB = new StaticExtFilteringSessionDB(); - // low generic cosmetic filters, organized by id/class then simple/complex. - this.lowlyGeneric = Object.create(null); - this.lowlyGeneric.id = { - canonical: 'ids', - prefix: '#', - simple: new Set(), - complex: new Map() - }; - this.lowlyGeneric.cl = { - canonical: 'classes', - prefix: '.', - simple: new Set(), - complex: new Map() - }; + // low generic cosmetic filters: map of hash => array of selectors + this.lowlyGeneric = new Map(); // highly generic selectors sets this.highlyGeneric = Object.create(null); @@ -240,8 +259,6 @@ const FilterContainer = function() { // is to prevent repeated allocation/deallocation overheads -- the // constructors/destructors of javascript Set/Map is assumed to be costlier // than just calling clear() on these. - this.$simpleSet = new Set(); - this.$complexSet = new Set(); this.$specificSet = new Set(); this.$exceptionSet = new Set(); this.$proceduralSet = new Set(); @@ -266,17 +283,11 @@ FilterContainer.prototype.reset = function() { this.selectorCacheTimer = null; } - // whether there is at least one surveyor-based filter - this.needDOMSurveyor = false; - // hostname, entity-based filters this.specificFilters.clear(); - // low generic cosmetic filters, organized by id/class then simple/complex. - this.lowlyGeneric.id.simple.clear(); - this.lowlyGeneric.id.complex.clear(); - this.lowlyGeneric.cl.simple.clear(); - this.lowlyGeneric.cl.complex.clear(); + // low generic cosmetic filters + this.lowlyGeneric.clear(); // highly generic selectors sets this.highlyGeneric.simple.dict.clear(); @@ -285,6 +296,8 @@ FilterContainer.prototype.reset = function() { this.highlyGeneric.complex.dict.clear(); this.highlyGeneric.complex.str = ''; this.highlyGeneric.complex.mru.reset(); + + this.selfieVersion = 1; }; /******************************************************************************/ @@ -293,12 +306,6 @@ FilterContainer.prototype.freeze = function() { this.duplicateBuster.clear(); this.specificFilters.collectGarbage(); - this.needDOMSurveyor = - this.lowlyGeneric.id.simple.size !== 0 || - this.lowlyGeneric.id.complex.size !== 0 || - this.lowlyGeneric.cl.simple.size !== 0 || - this.lowlyGeneric.cl.complex.size !== 0; - this.highlyGeneric.simple.str = Array.from(this.highlyGeneric.simple.dict).join(',\n'); this.highlyGeneric.simple.mru.reset(); this.highlyGeneric.complex.str = Array.from(this.highlyGeneric.complex.dict).join(',\n'); @@ -309,40 +316,6 @@ FilterContainer.prototype.freeze = function() { /******************************************************************************/ -// https://github.com/gorhill/uBlock/issues/1668 -// The key must be literal: unescape escaped CSS before extracting key. -// It's an uncommon case, so it's best to unescape only when needed. - -FilterContainer.prototype.keyFromSelector = function(selector) { - let matches = this.rePlainSelector.exec(selector); - if ( matches === null ) { return; } - let key = matches[0]; - if ( key.indexOf('\\') === -1 ) { - return key; - } - matches = this.rePlainSelectorEscaped.exec(selector); - if ( matches === null ) { return; } - key = ''; - const escaped = matches[0]; - let beg = 0; - this.reEscapeSequence.lastIndex = 0; - for (;;) { - matches = this.reEscapeSequence.exec(escaped); - if ( matches === null ) { - return key + escaped.slice(beg); - } - key += escaped.slice(beg, matches.index); - beg = this.reEscapeSequence.lastIndex; - if ( matches[1].length === 1 ) { - key += matches[1]; - } else { - key += String.fromCharCode(parseInt(matches[1], 16)); - } - } -}; - -/******************************************************************************/ - FilterContainer.prototype.compile = function(parser, writer) { if ( parser.hasOptions() === false ) { this.compileGenericSelector(parser, writer); @@ -396,38 +369,8 @@ FilterContainer.prototype.compileGenericHideSelector = function( writer.select('COSMETIC_FILTERS:GENERIC'); - const type = compiled.charCodeAt(0); - let key; - - // Simple selector-based CSS rule: no need to test for whether the - // selector is valid, the regex took care of this. Most generic selector - // falls into that category: - // - ###ad-bigbox - // - ##.ads-bigbox - if ( type === 0x23 /* '#' */ ) { - key = this.keyFromSelector(compiled); - if ( key === compiled ) { - writer.push([ 0, key.slice(1) ]); - return; - } - } else if ( type === 0x2E /* '.' */ ) { - key = this.keyFromSelector(compiled); - if ( key === compiled ) { - writer.push([ 2, key.slice(1) ]); - return; - } - } - - // Invalid cosmetic filter, possible reasons: - // - Bad syntax - // - Procedural filters (can't be generic): the compiled version of - // a procedural selector is NEVER equal to its raw version. - // https://github.com/uBlockOrigin/uBlock-issues/issues/464 - // Pseudoclass-based selectors can be compiled, but are also valid - // plain selectors. // https://github.com/uBlockOrigin/uBlock-issues/issues/131 // Support generic procedural filters as per advanced settings. - // TODO: prevent double compilation. if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) { if ( µb.hiddenSettings.allowGenericProceduralFilters === true ) { return this.compileSpecificSelector(parser, '', false, writer); @@ -441,28 +384,12 @@ FilterContainer.prototype.compileGenericHideSelector = function( return; } - // Complex selector-based CSS rule: - // - ###tads + div + .c - // - ##.rscontainer > .ellip + const key = keyFromSelector(compiled); if ( key !== undefined ) { writer.push([ - type === 0x23 /* '#' */ ? 1 : 3, - key.slice(1), - compiled - ]); - return; - } - - // https://github.com/gorhill/uBlock/issues/909 - // Anything which contains a plain id/class selector can be classified - // as a low generic cosmetic filter. - const matches = this.rePlainSelectorEx.exec(compiled); - if ( matches !== null ) { - const key = matches[1] || matches[2]; - writer.push([ - key.charCodeAt(0) === 0x23 /* '#' */ ? 1 : 3, - key.slice(1), - compiled + 0, + hashFromStr(key.charCodeAt(0), key.slice(1)), + compiled, ]); return; } @@ -618,36 +545,13 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) { this.duplicateBuster.add(fingerprint); const args = reader.args(); switch ( args[0] ) { - // low generic, simple - case 0: // #AdBanner - case 2: { // .largeAd - const db = args[0] === 0 ? this.lowlyGeneric.id : this.lowlyGeneric.cl; - const bucket = db.complex.get(args[1]); - if ( bucket === undefined ) { - db.simple.add(args[1]); - } else if ( Array.isArray(bucket) ) { - bucket.push(db.prefix + args[1]); + // low generic + case 0: { + if ( this.lowlyGeneric.has(args[1]) ) { + const selector = this.lowlyGeneric.get(args[1]); + this.lowlyGeneric.set(args[1], `${selector},\n${args[2]}`); } else { - db.complex.set(args[1], [ bucket, db.prefix + args[1] ]); - } - break; - } - // low generic, complex - case 1: // #tads + div + .c - case 3: { // .Mpopup + #Mad > #MadZone - const db = args[0] === 1 ? this.lowlyGeneric.id : this.lowlyGeneric.cl; - const bucket = db.complex.get(args[1]); - if ( bucket === undefined ) { - if ( db.simple.has(args[1]) ) { - db.complex.set(args[1], [ db.prefix + args[1], args[2] ]); - } else { - db.complex.set(args[1], args[2]); - db.simple.add(args[1]); - } - } else if ( Array.isArray(bucket) ) { - bucket.push(args[2]); - } else { - db.complex.set(args[1], [ bucket, args[2] ]); + this.lowlyGeneric.set(args[1], args[2]); } break; } @@ -682,13 +586,11 @@ FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) { FilterContainer.prototype.toSelfie = function() { return { + version: this.selfieVersion, acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, specificFilters: this.specificFilters.toSelfie(), - lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple), - lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex), - lowlyGenericSCL: Array.from(this.lowlyGeneric.cl.simple), - lowlyGenericCCL: Array.from(this.lowlyGeneric.cl.complex), + lowlyGeneric: Array.from(this.lowlyGeneric), highSimpleGenericHideArray: Array.from(this.highlyGeneric.simple.dict), highComplexGenericHideArray: Array.from(this.highlyGeneric.complex.dict), }; @@ -697,22 +599,19 @@ FilterContainer.prototype.toSelfie = function() { /******************************************************************************/ FilterContainer.prototype.fromSelfie = function(selfie) { + if ( selfie.version !== this.selfieVersion ) { + throw new Error( + `cosmeticFilteringEngine: mismatched selfie version, ${selfie.version}, expected ${this.selfieVersion}` + ); + } this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; this.specificFilters.fromSelfie(selfie.specificFilters); - this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID); - this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID); - this.lowlyGeneric.cl.simple = new Set(selfie.lowlyGenericSCL); - this.lowlyGeneric.cl.complex = new Map(selfie.lowlyGenericCCL); + this.lowlyGeneric = new Map(selfie.lowlyGeneric); this.highlyGeneric.simple.dict = new Set(selfie.highSimpleGenericHideArray); this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n'); this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray); this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n'); - this.needDOMSurveyor = - selfie.lowlyGenericSID.length !== 0 || - selfie.lowlyGenericCID.length !== 0 || - selfie.lowlyGenericSCL.length !== 0 || - selfie.lowlyGenericCCL.length !== 0; this.frozen = true; }; @@ -721,12 +620,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) { FilterContainer.prototype.triggerSelectorCachePruner = function() { // Of interest: http://fitzgeraldnick.com/weblog/40/ // http://googlecode.blogspot.ca/2009/07/gmail-for-mobile-html5-series-using.html - if ( this.selectorCacheTimer === null ) { - this.selectorCacheTimer = vAPI.setTimeout( - this.pruneSelectorCacheAsync.bind(this), - this.selectorCachePruneDelay - ); - } + if ( this.selectorCacheTimer !== null ) { return; } + this.selectorCacheTimer = vAPI.setTimeout( + ( ) => { this.pruneSelectorCacheAsync(); }, + this.selectorCachePruneDelay + ); }; /******************************************************************************/ @@ -740,7 +638,7 @@ FilterContainer.prototype.addToSelectorCache = function(details) { if ( entry === undefined ) { entry = SelectorCacheEntry.factory(); this.selectorCache.set(hostname, entry); - if ( this.selectorCache.size > this.selectorCacheCountMin ) { + if ( this.selectorCache.size > this.selectorCacheCountMax ) { this.triggerSelectorCachePruner(); } } @@ -753,7 +651,7 @@ FilterContainer.prototype.removeFromSelectorCache = function( targetHostname = '*', type = undefined ) { - let targetHostnameLength = targetHostname.length; + const targetHostnameLength = targetHostname.length; for ( let entry of this.selectorCache ) { let hostname = entry[0]; let item = entry[1]; @@ -772,46 +670,27 @@ FilterContainer.prototype.removeFromSelectorCache = function( /******************************************************************************/ -FilterContainer.prototype.retrieveFromSelectorCache = function( - hostname, - type, - out -) { - let entry = this.selectorCache.get(hostname); - if ( entry !== undefined ) { - entry.retrieve(type, out); +FilterContainer.prototype.pruneSelectorCacheAsync = function() { + this.selectorCacheTimer = null; + if ( this.selectorCache.size <= this.selectorCacheCountMax ) { return; } + const cache = this.selectorCache; + const hostnames = Array.from(cache.keys()) + .sort((a, b) => cache.get(b).accessId - cache.get(a).accessId) + .slice(this.selectorCacheCountMin); + for ( const hn of hostnames ) { + cache.get(hn).dispose(); + cache.delete(hn); } }; /******************************************************************************/ -FilterContainer.prototype.pruneSelectorCacheAsync = function() { - this.selectorCacheTimer = null; - if ( this.selectorCache.size <= this.selectorCacheCountMin ) { return; } - let cache = this.selectorCache; - // Sorted from most-recently-used to least-recently-used, because - // we loop beginning at the end below. - // We can't avoid sorting because we have to keep a minimum number of - // entries, and these entries should always be the most-recently-used. - let hostnames = Array.from(cache.keys()) - .sort(function(a, b) { - return cache.get(b).lastAccessTime - - cache.get(a).lastAccessTime; - }) - .slice(this.selectorCacheCountMin); - let obsolete = Date.now() - this.selectorCacheAgeMax, - i = hostnames.length; - while ( i-- ) { - let hostname = hostnames[i]; - let entry = cache.get(hostname); - if ( entry.lastAccessTime > obsolete ) { break; } - // console.debug('pruneSelectorCacheAsync: flushing "%s"', hostname); - entry.dispose(); - cache.delete(hostname); - } - if ( cache.size > this.selectorCacheCountMin ) { - this.triggerSelectorCachePruner(); - } +FilterContainer.prototype.disableSurveyor = function(details) { + const hostname = details.hostname; + if ( typeof hostname !== 'string' || hostname === '' ) { return; } + const cacheEntry = this.selectorCache.get(hostname); + if ( cacheEntry === undefined ) { return; } + cacheEntry.disableSurveyor = true; }; /******************************************************************************/ @@ -850,43 +729,19 @@ FilterContainer.prototype.cssRuleFromProcedural = function(json) { /******************************************************************************/ FilterContainer.prototype.retrieveGenericSelectors = function(request) { - if ( this.acceptedCount === 0 ) { return; } - if ( !request.ids && !request.classes ) { return; } + if ( this.lowlyGeneric.size === 0 ) { return; } + if ( Array.isArray(request.hashes) === false ) { return; } + if ( request.hashes.length === 0 ) { return; } - const { safeOnly = false } = request; - //console.time('cosmeticFilteringEngine.retrieveGenericSelectors'); - - const simpleSelectors = this.$simpleSet; - const complexSelectors = this.$complexSet; - - const cacheEntry = this.selectorCache.get(request.hostname); - const previousHits = cacheEntry && cacheEntry.cosmetic || this.$dummySet; - - for ( const type in this.lowlyGeneric ) { - const entry = this.lowlyGeneric[type]; - const selectors = request[entry.canonical]; - if ( Array.isArray(selectors) === false ) { continue; } - for ( const identifier of selectors ) { - if ( entry.simple.has(identifier) === false ) { continue; } - const bucket = entry.complex.get(identifier); - if ( typeof bucket === 'string' ) { - if ( previousHits.has(bucket) ) { continue; } - complexSelectors.add(bucket); - continue; - } - const simpleSelector = entry.prefix + identifier; - if ( Array.isArray(bucket) ) { - for ( const complexSelector of bucket ) { - if ( previousHits.has(complexSelector) ) { continue; } - if ( safeOnly && complexSelector === simpleSelector ) { continue; } - complexSelectors.add(complexSelector); - } - continue; - } - if ( previousHits.has(simpleSelector) ) { continue; } - if ( safeOnly ) { continue; } - simpleSelectors.add(simpleSelector); + const selectorsSet = new Set(); + const hashes = []; + for ( const hash of request.hashes ) { + const bucket = this.lowlyGeneric.get(hash); + if ( bucket === undefined ) { continue; } + for ( const selector of bucket.split(',\n') ) { + selectorsSet.add(selector); } + hashes.push(hash); } // Apply exceptions: it is the responsibility of the caller to provide @@ -894,48 +749,29 @@ FilterContainer.prototype.retrieveGenericSelectors = function(request) { const excepted = []; if ( Array.isArray(request.exceptions) ) { for ( const exception of request.exceptions ) { - if ( - simpleSelectors.delete(exception) || - complexSelectors.delete(exception) - ) { + if ( selectorsSet.delete(exception) ) { excepted.push(exception); } } } - if ( - simpleSelectors.size === 0 && - complexSelectors.size === 0 && - excepted.length === 0 - ) { - return; - } + if ( selectorsSet.size === 0 && excepted.length === 0 ) { return; } const out = { injectedCSS: '', excepted, }; - - const injected = []; - if ( simpleSelectors.size !== 0 ) { - injected.push(...simpleSelectors); - simpleSelectors.clear(); - } - if ( complexSelectors.size !== 0 ) { - injected.push(...complexSelectors); - complexSelectors.clear(); - } - - // Cache and inject looked-up low generic cosmetic filters. - if ( injected.length === 0 ) { return out; } + const selectors = Array.from(selectorsSet); if ( typeof request.hostname === 'string' && request.hostname !== '' ) { this.addToSelectorCache({ - cost: request.surveyCost || 0, hostname: request.hostname, - selectors: injected, + selectors, + hashes, type: 'cosmetic', }); } - out.injectedCSS = `${injected.join(',\n')}\n{display:none!important;}`; + if ( selectors.length === 0 ) { return out; } + + out.injectedCSS = `${selectors.join(',\n')}\n{display:none!important;}`; vAPI.tabs.insertCSS(request.tabId, { code: out.injectedCSS, frameId: request.frameId, @@ -943,8 +779,6 @@ FilterContainer.prototype.retrieveGenericSelectors = function(request) { runAt: 'document_start', }); - //console.timeEnd('cosmeticFilteringEngine.retrieveGenericSelectors'); - return out; }; @@ -972,7 +806,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function( exceptedFilters: [], proceduralFilters: [], convertedProceduralFilters: [], - noDOMSurveying: this.needDOMSurveyor === false, + disableSurveyor: this.lowlyGeneric.size === 0, }; const injectedCSS = []; @@ -987,10 +821,9 @@ FilterContainer.prototype.retrieveSpecificSelectors = function( // Cached cosmetic filters: these are always declarative. if ( cacheEntry !== undefined ) { - cacheEntry.retrieve('cosmetic', specificSet); - if ( out.noDOMSurveying === false ) { - out.noDOMSurveying = cacheEntry.cosmeticSurveyingMissCount > - cosmeticSurveyingMissCountMax; + cacheEntry.retrieveCosmetic(specificSet, out.genericCosmeticHashes = []); + if ( cacheEntry.disableSurveyor ) { + out.disableSurveyor = true; } } @@ -1123,8 +956,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function( // CSS selectors for collapsible blocked elements if ( cacheEntry ) { const networkFilters = []; - cacheEntry.retrieve('net', networkFilters); - if ( networkFilters.length !== 0 ) { + if ( cacheEntry.retrieveNet(networkFilters) ) { details.code = `${networkFilters.join('\n')}\n{display:none!important;}`; if ( request.tabId !== undefined ) { vAPI.tabs.insertCSS(request.tabId, details); @@ -1144,31 +976,16 @@ FilterContainer.prototype.getFilterCount = function() { /******************************************************************************/ FilterContainer.prototype.dump = function() { - let genericCount = 0; - for ( const i of [ 'simple', 'complex' ] ) { - for ( const j of [ 'id', 'cl' ] ) { - genericCount += this.lowlyGeneric[j][i].size; - } + const generics = []; + for ( const selectors of this.lowlyGeneric.values() ) { + generics.push(...selectors.split(',\n')); } return [ 'Cosmetic Filtering Engine internals:', `specific: ${this.specificFilters.size}`, - `generic: ${genericCount}`, - `+ lowly.id: ${this.lowlyGeneric.id.simple.size + this.lowlyGeneric.id.complex.size}`, - ` + simple: ${this.lowlyGeneric.id.simple.size}`, - ...Array.from(this.lowlyGeneric.id.simple).map(a => ` ###${a}`), - ` + complex: ${this.lowlyGeneric.id.complex.size}`, - ...Array.from(this.lowlyGeneric.id.complex.values()).map(a => ` ##${a}`), - `+ lowly.class: ${this.lowlyGeneric.cl.simple.size + this.lowlyGeneric.cl.complex.size}`, - ` + simple: ${this.lowlyGeneric.cl.simple.size}`, - ...Array.from(this.lowlyGeneric.cl.simple).map(a => ` ##.${a}`), - ` + complex: ${this.lowlyGeneric.cl.complex.size}`, - ...Array.from(this.lowlyGeneric.cl.complex.values()).map(a => ` ##${a}`), - `+ highly: ${this.highlyGeneric.simple.dict.size + this.highlyGeneric.complex.dict.size}`, - ` + highly.simple: ${this.highlyGeneric.simple.dict.size}`, - ...Array.from(this.highlyGeneric.simple.dict).map(a => ` ##${a}`), - ` + highly.complex: ${this.highlyGeneric.complex.dict.size}`, - ...Array.from(this.highlyGeneric.complex.dict).map(a => ` ##${a}`), + `generic: ${generics.length}`, + `+ selectors: ${this.lowlyGeneric.size}`, + ...generics.map(a => ` ${a}`), ].join('\n'); }; diff --git a/src/js/messaging.js b/src/js/messaging.js index 668f7f61a..1fd1c511e 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -823,6 +823,10 @@ const onMessage = function(request, sender, callback) { cosmeticFilteringEngine.addToSelectorCache(request); break; + case 'disableGenericCosmeticFilteringSurveyor': + cosmeticFilteringEngine.disableSurveyor(request); + break; + case 'getCollapsibleBlockedRequests': response = { id: request.id,