From a7fe367eecf2af16d07e8ec69743638cd2b05adf Mon Sep 17 00:00:00 2001 From: gorhill Date: Mon, 12 Sep 2016 10:22:25 -0400 Subject: [PATCH] refactor where appropriate to make use of ES6 Set/Map (#1070) At the same time, the following issues were fixed: - #1954: automatically lookup site-specific scriptlets - https://github.com/uBlockOrigin/uAssets/issues/23 --- platform/chromium/polyfill.js | 214 +++++++++ platform/firefox/polyfill.js | 74 ++++ src/background.html | 2 +- src/js/background.js | 4 +- src/js/cosmetic-filtering.js | 779 +++++++++++++++++---------------- src/js/static-net-filtering.js | 319 ++++---------- src/js/utils.js | 70 ++- tools/make-firefox.sh | 1 + 8 files changed, 843 insertions(+), 620 deletions(-) create mode 100644 platform/chromium/polyfill.js create mode 100644 platform/firefox/polyfill.js diff --git a/platform/chromium/polyfill.js b/platform/chromium/polyfill.js new file mode 100644 index 000000000..ca8d82cd2 --- /dev/null +++ b/platform/chromium/polyfill.js @@ -0,0 +1,214 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2014-2016 The uBlock Origin authors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +// For background page or non-background pages + +'use strict'; + +/******************************************************************************/ +/******************************************************************************/ + +// https://github.com/gorhill/uBlock/issues/1067 +// https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith +// Firefox 17/Chromium 41 supports `startsWith`. + +if ( String.prototype.startsWith instanceof Function === false ) { + String.prototype.startsWith = function(needle, pos) { + if ( typeof pos !== 'number' ) { + pos = 0; + } + return this.lastIndexOf(needle, pos) === pos; + }; +} + +/******************************************************************************/ + +// https://github.com/gorhill/uBlock/issues/1067 +// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/endsWith +// Firefox 17/Chromium 41 supports `endsWith`. + +if ( String.prototype.endsWith instanceof Function === false ) { + String.prototype.endsWith = function(needle, pos) { + if ( typeof pos !== 'number' ) { + pos = this.length; + } + pos -= needle.length; + return this.indexOf(needle, pos) === pos; + }; +} + +/******************************************************************************/ + +// https://github.com/gorhill/uBlock/issues/1070 +// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set#Browser_compatibility +// This polyfill is designed to fulfill *only* what uBlock Origin needs -- this +// is not an accurate API of the real Set() type. + +if ( self.Set instanceof Function === false ) { + self.Set = function(iter) { + this.clear(); + if ( Array.isArray(iter) ) { + for ( var i = 0, n = iter.length; i < n; i++ ) { + this.add(iter[i]); + } + return; + } + }; + + self.Set.polyfill = true; + + self.Set.prototype.clear = function() { + this._set = Object.create(null); + this.size = 0; + // Iterator stuff + this._values = undefined; + this._i = undefined; + this.value = undefined; + this.done = true; + }; + + self.Set.prototype.add = function(k) { + if ( this._set[k] === undefined ) { + this._set[k] = true; + this.size += 1; + } + return this; + }; + + self.Set.prototype.delete = function(k) { + if ( this._set[k] !== undefined ) { + delete this._set[k]; + this.size -= 1; + return true; + } + return false; + }; + + self.Set.prototype.has = function(k) { + return this._set[k] !== undefined; + }; + + self.Set.prototype.next = function() { + if ( this._i < this.size ) { + this.value = this._values[this._i++]; + } else { + this._values = undefined; + this.value = undefined; + this.done = true; + } + return this; + }; + + self.Set.prototype.values = function() { + this._values = Object.keys(this._set); + this._i = 0; + this.value = undefined; + this.done = false; + return this; + }; +} + +/******************************************************************************/ + +// https://github.com/gorhill/uBlock/issues/1070 +// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set#Browser_compatibility +// This polyfill is designed to fulfill *only* what uBlock Origin needs -- this +// is not an accurate API of the real Map() type. + +if ( self.Map instanceof Function === false ) { + self.Map = function(iter) { + this.clear(); + if ( Array.isArray(iter) ) { + for ( var i = 0, n = iter.length, entry; i < n; i++ ) { + entry = iter[i]; + this.set(entry[0], entry[1]); + } + return; + } + }; + + self.Map.polyfill = true; + + self.Map.prototype.clear = function() { + this._map = Object.create(null); + this.size = 0; + // Iterator stuff + this._keys = undefined; + this._i = undefined; + this.value = undefined; + this.done = true; + }; + + self.Map.prototype.delete = function(k) { + if ( this._map[k] !== undefined ) { + delete this._map[k]; + this.size -= 1; + return true; + } + return false; + }; + + self.Map.prototype.entries = function() { + this._keys = Object.keys(this._map); + this._i = 0; + this.value = [ undefined, undefined ]; + this.done = false; + return this; + }; + + self.Map.prototype.get = function(k) { + return this._map[k]; + }; + + self.Map.prototype.has = function(k) { + return this._map[k] !== undefined; + }; + + self.Map.prototype.next = function() { + if ( this._i < this.size ) { + var key = this._keys[this._i++]; + this.value[0] = key; + this.value[1] = this._map[key]; + } else { + this._keys = undefined; + this.value = undefined; + this.done = true; + } + return this; + }; + + self.Map.prototype.set = function(k, v) { + if ( v !== undefined ) { + if ( this._map[k] === undefined ) { + this.size += 1; + } + this._map[k] = v; + } else { + if ( this._map[k] !== undefined ) { + this.size -= 1; + } + delete this._map[k]; + } + return this; + }; +} + +/******************************************************************************/ diff --git a/platform/firefox/polyfill.js b/platform/firefox/polyfill.js new file mode 100644 index 000000000..3a8c2b6d8 --- /dev/null +++ b/platform/firefox/polyfill.js @@ -0,0 +1,74 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2016 The uBlock Origin authors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +// For background page or non-background pages + +'use strict'; + +/******************************************************************************/ + +// Patching for Pale Moon which does not implement ES6 Set/Map. +// Test for non-ES6 Set/Map: check if property `iterator` is present. +// The code is strictly to satisfy uBO's core, not to be an accurate +// implementation of ES6. + +if ( self.Set.prototype.iterator instanceof Function ) { + //console.log('Patching non-ES6 Set() to be more ES6-like.'); + self.Set.prototype._values = self.Set.prototype.values; + self.Set.prototype.values = function() { + this._valueIter = this._values(); + this.value = undefined; + this.done = false; + return this; + }; + self.Set.prototype.next = function() { + try { + this.value = this._valueIter.next(); + } catch (ex) { + this._valueIter = undefined; + this.value = undefined; + this.done = true; + } + return this; + }; +} + +if ( self.Map.prototype.iterator instanceof Function ) { + //console.log('Patching non-ES6 Map() to be more ES6-like.'); + self.Map.prototype._entries = self.Map.prototype.entries; + self.Map.prototype.entries = function() { + this._entryIter = this._entries(); + this.value = undefined; + this.done = false; + return this; + }; + self.Map.prototype.next = function() { + try { + this.value = this._entryIter.next(); + } catch (ex) { + this._entryIter = undefined; + this.value = undefined; + this.done = true; + } + return this; + }; +} + diff --git a/src/background.html b/src/background.html index da29ee695..a8463068e 100644 --- a/src/background.html +++ b/src/background.html @@ -5,10 +5,10 @@ uBlock Origin + - diff --git a/src/js/background.js b/src/js/background.js index 63b420cc4..1a384b588 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -93,8 +93,8 @@ return { // read-only systemSettings: { - compiledMagic: 'splsmclwnvoj', - selfieMagic: 'rkzqonintytj' + compiledMagic: 'ryegxvatkfxe', + selfieMagic: 'ryegxvatkfxe' }, restoreBackupSettings: { diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index dad78ecb2..58cc59850 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -177,34 +177,6 @@ FilterHostname.fromSelfie = function(s) { /******************************************************************************/ -// Any selector specific to an entity -// Examples: -// google.*###cnt #center_col > #res > #topstuff > .ts - -var FilterEntity = function(s, entity) { - this.s = s; - this.entity = entity; -}; - -FilterEntity.prototype.retrieve = function(entity, out) { - if ( entity.endsWith(this.entity) ) { - out.push(this.s); - } -}; - -FilterEntity.prototype.fid = 'e'; - -FilterEntity.prototype.toSelfie = function() { - return encode(this.s) + '\t' + this.entity; -}; - -FilterEntity.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterEntity(decode(s.slice(0, pos)), s.slice(pos + 1)); -}; - -/******************************************************************************/ - var FilterBucket = function(a, b) { this.f = null; this.filters = []; @@ -246,8 +218,7 @@ var FilterParser = function() { this.hostnames = []; this.invalid = false; this.cosmetic = true; - this.reScriptTagFilter = /^script:(contains|inject)\((.+?)\)$/; - this.reNeedHostname = /^(?:.+?:has|.+?:matches-css|:xpath)\(.+?\)$/; + this.reNeedHostname = /^(?:script:contains|script:inject|.+?:has|.+?:matches-css|:xpath)\(.+?\)$/; }; /******************************************************************************/ @@ -356,7 +327,12 @@ FilterParser.prototype.parse = function(raw) { this.hostnames = this.prefix.split(/\s*,\s*/); } - // For some selectors, it is mandatory to have a hostname or entity. + // For some selectors, it is mandatory to have a hostname or entity: + // ##script:contains(...) + // ##script:inject(...) + // ##.foo:has(...) + // ##.foo:matches-css(...) + // ##:xpath(...) if ( this.hostnames.length === 0 && this.unhide === 0 && @@ -366,18 +342,6 @@ FilterParser.prototype.parse = function(raw) { return this; } - // Script tag filters: pre-process them so that can be used with minimal - // overhead in the content script. - // Examples: - // focus.de##script:contains(/uabInject/) - // focus.de##script:contains(uabInject) - // focus.de##script:inject(uabinject-defuser.js) - - var matches = this.reScriptTagFilter.exec(this.suffix); - if ( matches !== null ) { - return this.parseScriptTagFilter(matches); - } - return this; }; @@ -400,43 +364,6 @@ FilterParser.prototype.translateAdguardCSSInjectionFilter = function(raw) { ':style(' + matches[4].trim() + ')'; }; -/******************************************************************************/ - -FilterParser.prototype.parseScriptTagFilter = function(matches) { - // Currently supported only as non-generic selector. Also, exception - // script tag filter makes no sense, ignore. - if ( this.hostnames.length === 0 || this.unhide === 1 ) { - this.invalid = true; - return this; - } - - var token = matches[2]; - - switch ( matches[1] ) { - case 'contains': - // Plain string- or regex-based? - if ( token.startsWith('/') === false || token.endsWith('/') === false ) { - token = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - } else { - token = token.slice(1, -1); - if ( isBadRegex(token) ) { - µb.logger.writeOne('', 'error', 'Cosmetic filtering – bad regular expression: ' + this.raw + ' (' + isBadRegex.message + ')'); - this.invalid = true; - } - } - this.suffix = 'script?' + token; - break; - case 'inject': - this.suffix = 'script+' + token; - break; - default: - this.invalid = true; - break; - } - - return this; -}; - /******************************************************************************/ /******************************************************************************/ @@ -598,11 +525,11 @@ SelectorCacheEntry.prototype.retrieve = function(type, out) { // +-- filter type (0=hide 1=unhide) // -var makeHash = function(unhide, token, mask) { +var makeHash = function(token) { // Ref: Given a URL, returns a unique 4-character long hash string // Based on: FNV32a // http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source - // The rest is custom, suited for µBlock. + // The rest is custom, suited for uBlock. var i1 = token.length; var i2 = i1 >> 1; var i4 = i1 >> 2; @@ -631,10 +558,7 @@ var makeHash = function(unhide, token, mask) { hval ^= token.charCodeAt(i1-1); hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); hval >>>= 0; - hval &= mask; - if ( unhide !== 0 ) { - hval |= 0x20000; - } + hval &= 0x0FFF; // 12 bits return hval.toString(36); }; @@ -657,9 +581,7 @@ var makeHash = function(unhide, token, mask) { // Specific filers can be enforced before the main document is loaded. var FilterContainer = function() { - this.domainHashMask = (1 << 10) - 1; // 10 bits - this.type0NoDomainHash = 'type0NoDomain'; - this.type1NoDomainHash = 'type1NoDomain'; + this.noDomainHash = '-'; this.parser = new FilterParser(); this.selectorCachePruneDelay = 10 * 60 * 1000; // 15 minutes this.selectorCacheAgeMax = 120 * 60 * 1000; // 120 minutes @@ -667,6 +589,12 @@ var FilterContainer = function() { this.netSelectorCacheCountMax = netSelectorCacheHighWaterMark; this.selectorCacheTimer = null; this.reHasUnicode = /[^\x00-\x7F]/; + this.reClassOrIdSelector = /^[#.][\w-]+$/; + this.rePlainSelector = /^[#.][\w-]+/; + this.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)/; + this.reHighLow = /^[a-z]*\[(?:alt|title)="[^"]+"\]$/; + this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/; + this.reScriptSelector = /^script:(contains|inject)\((.+)\)$/; this.punycode = punycode; this.reset(); }; @@ -681,7 +609,7 @@ FilterContainer.prototype.reset = function() { this.frozen = false; this.acceptedCount = 0; this.discardedCount = 0; - this.duplicateBuster = {}; + this.duplicateBuster = new Set(); this.selectorCache = {}; this.selectorCacheCount = 0; @@ -694,7 +622,8 @@ FilterContainer.prototype.reset = function() { this.hasGenericHide = false; // [class], [id] - this.lowGenericHide = {}; + this.lowGenericHide = new Set(); + this.lowGenericHideEx = new Map(); this.lowGenericHideCount = 0; // [alt="..."], [title="..."] @@ -719,18 +648,17 @@ FilterContainer.prototype.reset = function() { this.genericDonthide = []; // hostname, entity-based filters - this.hostnameFilters = {}; - this.entityFilters = {}; + this.specificFilters = new Map(); this.scriptTagFilters = {}; this.scriptTagFilterCount = 0; - this.scriptTags = {}; - this.scriptTagCount = 0; + this.userScripts = new Map(); + this.userScriptCount = 0; }; /******************************************************************************/ FilterContainer.prototype.freeze = function() { - this.duplicateBuster = {}; + this.duplicateBuster = new Set(); if ( this.highHighSimpleGenericHide !== '' ) { this.highHighSimpleGenericHideArray.unshift(this.highHighSimpleGenericHide); @@ -785,11 +713,12 @@ FilterContainer.prototype.isValidSelector = (function() { }; } - var reHasSelector = /^(.+?):has\((.+?)\)$/; - var reMatchesCSSSelector = /^(.+?):matches-css\((.+?)\)$/; - var reXpathSelector = /^:xpath\((.+?)\)$/; - var reStyleSelector = /^(.+?):style\((.+?)\)$/; - var reStyleBad = /url\([^)]+\)/; + var reHasSelector = /^(.+?):has\((.+?)\)$/, + reMatchesCSSSelector = /^(.+?):matches-css\((.+?)\)$/, + reXpathSelector = /^:xpath\((.+?)\)$/, + reStyleSelector = /^(.+?):style\((.+?)\)$/, + reStyleBad = /url\([^)]+\)/, + reScriptSelector = /^script:(contains|inject)\((.+)\)$/; // Keep in mind: // https://github.com/gorhill/uBlock/issues/693 @@ -836,10 +765,14 @@ FilterContainer.prototype.isValidSelector = (function() { return isValidCSSSelector(matches[1]) && reStyleBad.test(matches[2]) === false; } // Special `script:` filter? - if ( s.startsWith('script') ) { - if ( s.startsWith('?', 6) || s.startsWith('+', 6) ) { + matches = reScriptSelector.exec(s); + if ( matches !== null ) { + if ( matches[1] === 'inject' ) { return true; } + return matches[2].startsWith('/') === false || + matches[2].endsWith('/') === false || + isBadRegex(matches[2].slice(1, -1)) === false; } µb.logger.writeOne('', 'error', 'Cosmetic filtering – invalid filter: ' + s); return false; @@ -884,11 +817,7 @@ FilterContainer.prototype.compile = function(s, out) { if ( hostname.startsWith('~') === false ) { applyGlobally = false; } - if ( hostname.endsWith('.*') ) { - this.compileEntitySelector(hostname, parsed, out); - } else { - this.compileHostnameSelector(hostname, parsed, out); - } + this.compileHostnameSelector(hostname, parsed, out); } if ( applyGlobally ) { this.compileGenericSelector(parsed, out); @@ -900,20 +829,19 @@ FilterContainer.prototype.compile = function(s, out) { /******************************************************************************/ FilterContainer.prototype.compileGenericSelector = function(parsed, out) { - var selector = parsed.suffix; - - // https://github.com/chrisaljoudi/uBlock/issues/497 - // All generic exception filters are put in the same bucket: they are - // expected to be very rare. - if ( parsed.unhide ) { - if ( this.isValidSelector(selector) ) { - out.push('c\vg1\v' + selector); - } - return; + if ( parsed.unhide === 0 ) { + this.compileGenericHideSelector(parsed, out); + } else { + this.compileGenericUnhideSelector(parsed, out); } +}; - var type = selector.charAt(0); - var matches; +/******************************************************************************/ + +FilterContainer.prototype.compileGenericHideSelector = function(parsed, out) { + var selector = parsed.suffix, + type = selector.charAt(0), + matches; if ( type === '#' || type === '.' ) { matches = this.rePlainSelector.exec(selector); @@ -924,19 +852,12 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, out) { // is valid, the regex took care of this. Most generic selector falls // into that category. if ( matches[0] === selector ) { - out.push( - 'c\vlg\v' + - matches[0] - ); + out.push('c\vlg\v' + matches[0]); return; } // Many-CSS rules if ( this.isValidSelector(selector) ) { - out.push( - 'c\vlg+\v' + - matches[0] + '\v' + - selector - ); + out.push('c\vlg+\v' + matches[0] + '\v' + selector); } return; } @@ -954,11 +875,14 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, out) { // [href^="..."] will go in high-medium generic bin. matches = this.reHighMedium.exec(selector); if ( matches && matches.length === 2 ) { - out.push( - 'c\vhmg0\v' + - matches[1] + '\v' + - selector - ); + out.push('c\vhmg0\v' + matches[1] + '\v' + selector); + return; + } + + // script:contains(...) + // script:inject(...) + if ( this.reScriptSelector.test(selector) ) { + out.push('c\vjs\v0\v\v' + selector); return; } @@ -967,11 +891,7 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, out) { // as a low generic cosmetic filter. matches = this.rePlainSelectorEx.exec(selector); if ( matches && matches.length === 2 ) { - out.push( - 'c\vlg+\v' + - matches[1] + '\v' + - selector - ); + out.push('c\vlg+\v' + matches[1] + '\v' + selector); return; } @@ -984,11 +904,27 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, out) { } }; -FilterContainer.prototype.reClassOrIdSelector = /^[#.][\w-]+$/; -FilterContainer.prototype.rePlainSelector = /^[#.][\w-]+/; -FilterContainer.prototype.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)/; -FilterContainer.prototype.reHighLow = /^[a-z]*\[(?:alt|title)="[^"]+"\]$/; -FilterContainer.prototype.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/; +/******************************************************************************/ + +FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, out) { + var selector = parsed.suffix; + + if ( this.isValidSelector(selector) !== true ) { + return; + } + + // script:contains(...) + // script:inject(...) + if ( this.reScriptSelector.test(selector) ) { + out.push('c\vjs\v1\v\v' + selector); + return; + } + + // https://github.com/chrisaljoudi/uBlock/issues/497 + // All generic exception filters are put in the same bucket: they are + // expected to be very rare. + out.push('c\vg1\v' + selector); +}; /******************************************************************************/ @@ -1002,38 +938,35 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, o // punycode if needed if ( this.reHasUnicode.test(hostname) ) { - //console.debug('µBlock.cosmeticFilteringEngine/FilterContainer.compileHostnameSelector> punycoding:', hostname); hostname = this.punycode.toASCII(hostname); } + var domain = this.µburi.domainFromHostname(hostname), + hash; + + // script:contains(...) + // script:inject(...) + if ( this.reScriptSelector.test(parsed.suffix) ) { + hash = domain !== '' ? domain : this.noDomainHash; + if ( unhide ) { + hash = '!' + hash; + } + out.push('c\vjs\v' + hash + '\v' + hostname + '\v' + parsed.suffix); + return; + } + // https://github.com/chrisaljoudi/uBlock/issues/188 // If not a real domain as per PSL, assign a synthetic one - var hash; - var domain = this.µburi.domainFromHostname(hostname); - if ( domain === '' ) { - hash = unhide === 0 ? this.type0NoDomainHash : this.type1NoDomainHash; + if ( hostname.endsWith('.*') === false ) { + hash = domain !== '' ? makeHash(domain) : this.noDomainHash; } else { - hash = makeHash(unhide, domain, this.domainHashMask); + hash = makeHash(hostname); + } + if ( unhide ) { + hash = '!' + hash; } - out.push( - 'c\v' + - 'h\v' + - hash + '\v' + - hostname + '\v' + - parsed.suffix - ); -}; -/******************************************************************************/ - -FilterContainer.prototype.compileEntitySelector = function(hostname, parsed, out) { - var entity = hostname.slice(0, -2); - out.push( - 'c\v' + - 'e\v' + - entity + '\v' + - parsed.suffix - ); + out.push('c\vh\v' + hash + '\v' + hostname + '\v' + parsed.suffix); }; /******************************************************************************/ @@ -1048,112 +981,122 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo return; } - var line, fields, filter, key, bucket; + var line, field0, field1, field2, field3, filter, bucket, + fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { return; } - line = lineIter.next().slice(2); + line = lineIter.next(); this.acceptedCount += 1; - if ( this.duplicateBuster.hasOwnProperty(line) ) { + if ( this.duplicateBuster.has(line) ) { this.discardedCount += 1; continue; } - this.duplicateBuster[line] = true; + this.duplicateBuster.add(line); - fields = line.split('\v'); + fieldIter.first(line); + field0 = fieldIter.next(); + field1 = fieldIter.next(); - // h [\t] ir [\t] twitter.com [\t] .promoted-tweet - if ( fields[0] === 'h' ) { - // Special filter: script tags. Not a real CSS selector. - if ( fields[3].startsWith('script') ) { - this.createScriptFilter(fields[2], fields[3].slice(6)); - continue; - } - filter = new FilterHostname(fields[3], fields[2]); - bucket = this.hostnameFilters[fields[1]]; + // h [\v] hash [\v] example.com [\v] .promoted-tweet + // h [\v] hash [\v] example.* [\v] .promoted-tweet + if ( field0 === 'h' ) { + field2 = fieldIter.next(); + field3 = fieldIter.next(); + filter = new FilterHostname(field3, field2); + bucket = this.specificFilters.get(field1); if ( bucket === undefined ) { - this.hostnameFilters[fields[1]] = filter; + this.specificFilters.set(field1, filter); } else if ( bucket instanceof FilterBucket ) { bucket.add(filter); } else { - this.hostnameFilters[fields[1]] = new FilterBucket(bucket, filter); + this.specificFilters.set(field1, new FilterBucket(bucket, filter)); } continue; } - // lg [\t] 105 [\t] .largeAd - // lg+ [\t] 2jx [\t] .Mpopup + #Mad > #MadZone - if ( fields[0] === 'lg' || fields[0] === 'lg+' ) { - filter = fields[0] === 'lg' ? - filterPlain : - new FilterPlainMore(fields[2]); - bucket = this.lowGenericHide[fields[1]]; + // lg [\v] .largeAd + if ( field0 === 'lg' ) { + bucket = this.lowGenericHideEx.get(field1); if ( bucket === undefined ) { - this.lowGenericHide[fields[1]] = filter; - } else if ( bucket instanceof FilterBucket ) { - bucket.add(filter); + this.lowGenericHide.add(field1); + } else if ( Array.isArray(bucket) ) { + bucket.push(field1); } else { - this.lowGenericHide[fields[1]] = new FilterBucket(bucket, filter); + this.lowGenericHideEx.set(field1, [ bucket, field1 ]); } this.lowGenericHideCount += 1; continue; } - // entity [\t] selector - if ( fields[0] === 'e' ) { - // Special filter: script tags. Not a real CSS selector. - if ( fields[2].startsWith('script') ) { - this.createScriptFilter(fields[1], fields[2].slice(6)); - continue; - } - bucket = this.entityFilters[fields[1]]; + // lg+ [\v] .Mpopup [\v] .Mpopup + #Mad > #MadZone + if ( field0 === 'lg+' ) { + field2 = fieldIter.next(); + bucket = this.lowGenericHideEx.get(field1); if ( bucket === undefined ) { - this.entityFilters[fields[1]] = [fields[2]]; + if ( this.lowGenericHide.has(field1) ) { + this.lowGenericHideEx.set(field1, [ field1, field2 ]); + } else { + this.lowGenericHideEx.set(field1, field2); + this.lowGenericHide.add(field1); + } + } else if ( Array.isArray(bucket) ) { + bucket.push(field2); } else { - bucket.push(fields[2]); + this.lowGenericHideEx.set(field1, [ bucket, field2 ]); } + this.lowGenericHideCount += 1; continue; } - if ( fields[0] === 'hlg0' ) { - this.highLowGenericHide[fields[1]] = true; + if ( field0 === 'hlg0' ) { + this.highLowGenericHide[field1] = true; this.highLowGenericHideCount += 1; continue; } - if ( fields[0] === 'hmg0' ) { - key = fields[1]; - bucket = this.highMediumGenericHide[key]; + if ( field0 === 'hmg0' ) { + field2 = fieldIter.next(); + bucket = this.highMediumGenericHide[field1]; if ( bucket === undefined ) { - this.highMediumGenericHide[key] = fields[2]; + this.highMediumGenericHide[field1] = field2; } else if ( Array.isArray(bucket) ) { - bucket.push(fields[2]); + bucket.push(field2); } else { - this.highMediumGenericHide[key] = [bucket, fields[2]]; + this.highMediumGenericHide[field1] = [bucket, field2]; } this.highMediumGenericHideCount += 1; continue; } - if ( fields[0] === 'hhsg0' ) { - this.highHighSimpleGenericHideArray.push(fields[1]); + if ( field0 === 'hhsg0' ) { + this.highHighSimpleGenericHideArray.push(field1); this.highHighSimpleGenericHideCount += 1; continue; } - if ( fields[0] === 'hhcg0' ) { - this.highHighComplexGenericHideArray.push(fields[1]); + if ( field0 === 'hhcg0' ) { + this.highHighComplexGenericHideArray.push(field1); this.highHighComplexGenericHideCount += 1; continue; } + // js [\v] hash [\v] example.com [\v] script:contains(...) + // js [\v] hash [\v] example.com [\v] script:inject(...) + if ( field0 === 'js' ) { + field2 = fieldIter.next(); + field3 = fieldIter.next(); + this.createScriptFilter(field1, field2, field3); + continue; + } + // https://github.com/chrisaljoudi/uBlock/issues/497 // Generic exception filters: expected to be a rare occurrence. - if ( fields[0] === 'g1' ) { - this.genericDonthide.push(fields[1]); + if ( field0 === 'g1' ) { + this.genericDonthide.push(field1); continue; } @@ -1164,64 +1107,58 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter, skipGenericCo /******************************************************************************/ FilterContainer.prototype.skipGenericCompiledContent = function(lineIter) { - var line, fields, filter, bucket; + var line, field0, field1, field2, field3, filter, bucket, + fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { return; } - line = lineIter.next().slice(2); + line = lineIter.next(); this.acceptedCount += 1; - if ( this.duplicateBuster.hasOwnProperty(line) ) { + if ( this.duplicateBuster.has(line) ) { this.discardedCount += 1; continue; } - fields = line.split('\v'); + fieldIter.first(line); + field0 = fieldIter.next(); + field1 = fieldIter.next(); - // h [\t] ir [\t] twitter.com [\t] .promoted-tweet - if ( fields[0] === 'h' ) { - this.duplicateBuster[line] = true; - // Special filter: script tags. Not a real CSS selector. - if ( fields[3].startsWith('script') ) { - this.createScriptFilter(fields[2], fields[3].slice(6)); - continue; - } - filter = new FilterHostname(fields[3], fields[2]); - bucket = this.hostnameFilters[fields[1]]; + // h [\v] hash [\v] example.com [\v] .promoted-tweet + // h [\v] hash [\v] example.* [\v] .promoted-tweet + if ( field0 === 'h' ) { + field2 = fieldIter.next(); + field3 = fieldIter.next(); + this.duplicateBuster.add(line); + filter = new FilterHostname(field3, field2); + bucket = this.specificFilters.get(field1); if ( bucket === undefined ) { - this.hostnameFilters[fields[1]] = filter; + this.specificFilters.set(field1, filter); } else if ( bucket instanceof FilterBucket ) { bucket.add(filter); } else { - this.hostnameFilters[fields[1]] = new FilterBucket(bucket, filter); + this.specificFilters.set(field1, new FilterBucket(bucket, filter)); } continue; } - // entity [\t] selector - if ( fields[0] === 'e' ) { - this.duplicateBuster[line] = true; - // Special filter: script tags. Not a real CSS selector. - if ( fields[2].startsWith('script') ) { - this.createScriptFilter(fields[1], fields[2].slice(6)); - continue; - } - bucket = this.entityFilters[fields[1]]; - if ( bucket === undefined ) { - this.entityFilters[fields[1]] = [fields[2]]; - } else { - bucket.push(fields[2]); - } + // js [\v] hash [\v] example.com [\v] script:contains(...) + // js [\v] hash [\v] example.com [\v] script:inject(...) + if ( field0 === 'js' ) { + field2 = fieldIter.next(); + field3 = fieldIter.next(); + this.duplicateBuster.add(line); + this.createScriptFilter(field1, field2, field3); continue; } // https://github.com/chrisaljoudi/uBlock/issues/497 // Generic exception filters: expected to be a rare occurrence. - if ( fields[0] === 'g1' ) { - this.duplicateBuster[line] = true; - this.genericDonthide.push(fields[1]); + if ( field0 === 'g1' ) { + this.duplicateBuster.add(line); + this.genericDonthide.push(field1); continue; } @@ -1232,31 +1169,32 @@ FilterContainer.prototype.skipGenericCompiledContent = function(lineIter) { /******************************************************************************/ FilterContainer.prototype.skipCompiledContent = function(lineIter) { - var line, fields; + var line, field0, field1, field2, field3, + fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x63 /* 'c' */ ) { return; } - line = lineIter.next().slice(2); + line = lineIter.next(); this.acceptedCount += 1; - if ( this.duplicateBuster.hasOwnProperty(line) ) { + if ( this.duplicateBuster.has(line) ) { this.discardedCount += 1; continue; } - fields = line.split('\v'); + fieldIter.first(line); + field0 = fieldIter.next(); - if ( fields[0] === 'h' && fields[3].startsWith('script') ) { - this.duplicateBuster[line] = true; - this.createScriptFilter(fields[2], fields[3].slice(6)); - continue; - } - - if ( fields[0] === 'e' && fields[2].startsWith('script') ) { - this.duplicateBuster[line] = true; - this.createScriptFilter(fields[1], fields[2].slice(6)); + // js [\v] hash [\v] example.com [\v] script:contains(...) + // js [\v] hash [\v] example.com [\v] script:inject(...) + if ( field0 === 'js' ) { + this.duplicateBuster.add(line); + field1 = fieldIter.next(); + field2 = fieldIter.next(); + field3 = fieldIter.next(); + this.createScriptFilter(field1, field2, field3); continue; } @@ -1266,23 +1204,34 @@ FilterContainer.prototype.skipCompiledContent = function(lineIter) { /******************************************************************************/ -FilterContainer.prototype.createScriptFilter = function(hostname, s) { - if ( s.charAt(0) === '?' ) { - return this.createScriptTagFilter(hostname, s.slice(1)); +FilterContainer.prototype.createScriptFilter = function(hash, hostname, selector) { + if ( selector.startsWith('script:contains') ) { + return this.createScriptTagFilter(hash, hostname, selector); } - if ( s.charAt(0) === '+' ) { - return this.createScriptTagInjector(hostname, s.slice(1)); + if ( selector.startsWith('script:inject') ) { + return this.createUserScriptRule(hash, hostname, selector); } }; /******************************************************************************/ -FilterContainer.prototype.createScriptTagFilter = function(hostname, s) { +// 0123456789012345678901 +// script:contains(token) +// ^ ^ +// 16 -1 + +FilterContainer.prototype.createScriptTagFilter = function(hash, hostname, selector) { + var token = selector.slice(16, -1); + token = token.startsWith('/') && token.endsWith('/') + ? token.slice(1, -1) + : token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + if ( this.scriptTagFilters.hasOwnProperty(hostname) ) { - this.scriptTagFilters[hostname] += '|' + s; + this.scriptTagFilters[hostname] += '|' + token; } else { - this.scriptTagFilters[hostname] = s; + this.scriptTagFilters[hostname] = token; } + this.scriptTagFilterCount += 1; }; @@ -1293,6 +1242,8 @@ FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) { return; } var out = [], hn = hostname, pos; + + // Hostname-based for (;;) { if ( this.scriptTagFilters.hasOwnProperty(hn) ) { out.push(this.scriptTagFilters[hn]); @@ -1306,9 +1257,11 @@ FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) { } hn = hn.slice(pos + 1); } + + // Entity-based pos = domain.indexOf('.'); if ( pos !== -1 ) { - hn = domain.slice(0, pos); + hn = domain.slice(0, pos) + '.*'; if ( this.scriptTagFilters.hasOwnProperty(hn) ) { out.push(this.scriptTagFilters[hn]); } @@ -1320,80 +1273,130 @@ FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) { /******************************************************************************/ -FilterContainer.prototype.createScriptTagInjector = function(hostname, s) { - if ( this.scriptTags.hasOwnProperty(hostname) ) { - this.scriptTags[hostname].push(s); - } else { - this.scriptTags[hostname] = [s]; - } - this.scriptTagCount += 1; -}; +// userScripts{hash} => FilterHostname | FilterBucket +FilterContainer.prototype.createUserScriptRule = function(hash, hostname, selector) { + var filter = new FilterHostname(selector, hostname); + var bucket = this.userScripts.get(hash); + if ( bucket === undefined ) { + this.userScripts.set(hash, filter); + } else if ( bucket instanceof FilterBucket ) { + bucket.add(filter); + } else { + this.userScripts.set(hash, new FilterBucket(bucket, filter)); + } + this.userScriptCount += 1; +}; /******************************************************************************/ -FilterContainer.prototype.retrieveScriptTags = function(domain, hostname) { - if ( this.scriptTagCount === 0 ) { +// https://github.com/gorhill/uBlock/issues/1954 + +// 01234567890123456789 +// script:inject(token) +// ^ ^ +// 14 -1 + +FilterContainer.prototype.retrieveUserScripts = function(domain, hostname) { + if ( this.userScriptCount === 0 ) { return; } + var reng = µb.redirectEngine; if ( !reng ) { return; } + var out = [], - hn = hostname, pos, rnames, i, content; + scripts = Object.create(null), + pos = domain.indexOf('.'), + entity = pos !== -1 ? domain.slice(0, pos) + '.*' : '', + token, content; + + // Implicit + var hn = hostname; for (;;) { - rnames = this.scriptTags[hn]; - i = rnames && rnames.length || 0; - while ( i-- ) { - if ( (content = reng.resourceContentFromName(rnames[i], 'application/javascript')) ) { - out.push(content); - } - } - if ( hn === domain ) { - break; + token = hn + '.js'; + if ( + (scripts[token] === undefined) && + (content = reng.resourceContentFromName(token, 'application/javascript')) + ) { + scripts[token] = out.length; + out.push(content); } + if ( hn === domain ) { break; } pos = hn.indexOf('.'); - if ( pos === -1 ) { - break; - } + if ( pos === -1 ) { break; } hn = hn.slice(pos + 1); } - pos = domain.indexOf('.'); - if ( pos !== -1 ) { - rnames = this.scriptTags[domain.slice(0, pos)]; - i = rnames && rnames.length || 0; - while ( i-- ) { - if ( (content = reng.resourceContentFromName(rnames[i], 'application/javascript')) ) { - out.push(content); - } + + // Explicit (hash is domain). + var selectors = [], + selector, bucket; + if ( (bucket = this.userScripts.get(domain)) ) { + bucket.retrieve(hostname, selectors); + } + if ( entity !== '' && (bucket = this.userScripts.get(entity)) ) { + bucket.retrieve(hostname, selectors); + } + var i = selectors.length; + while ( i-- ) { + selector = selectors[i]; + token = selector.slice(14, -1); + if ( + (scripts[token] === undefined) && + (content = reng.resourceContentFromName(token, 'application/javascript')) + ) { + scripts[token] = out.length; + out.push(content); } } + + if ( out.length === 0 ) { + return; + } + + // Exceptions should be rare, so we check for exception only if there are + // scriptlets returned. + var exceptions = [], j; + if ( (bucket = this.userScripts.get('!' + domain)) ) { + bucket.retrieve(hostname, exceptions); + } + if ( entity !== '' && (bucket = this.userScripts.get('!' + entity)) ) { + bucket.retrieve(hostname, exceptions); + } + i = exceptions.length; + while ( i-- ) { + token = exceptions[i].slice(14, -1); + if ( (j = scripts[token]) !== undefined ) { + out[j] = '// User script "' + token + '" excepted.\n'; + } + } + return out.join('\n'); }; /******************************************************************************/ FilterContainer.prototype.toSelfie = function() { - var selfieFromDict = function(dict) { - var selfie = []; - var bucket, ff, n, i, f; - for ( var k in dict ) { - if ( dict.hasOwnProperty(k) === false ) { - continue; + var selfieFromMap = function(map) { + var selfie = [], + entry, bucket, ff, f, + iterator = map.entries(); + for (;;) { + entry = iterator.next(); + if ( entry.done ) { + break; } - // We need to encode the key because there could be a `\n` - // character in it, which would trip the code at parse time. - selfie.push('k\t' + encode(k)); - bucket = dict[k]; + selfie.push('k\t' + entry.value[0]); + bucket = entry.value[1]; selfie.push(bucket.fid + '\t' + bucket.toSelfie()); if ( bucket.fid !== '[]' ) { continue; } ff = bucket.filters; - n = ff.length; - for ( i = 0; i < n; i++ ) { - f = ff[i]; + for ( var j = 0, nj = ff.length; j < nj; j++ ) { + f = ff[j]; selfie.push(f.fid + '\t' + f.toSelfie()); } } @@ -1403,10 +1406,10 @@ FilterContainer.prototype.toSelfie = function() { return { acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, - hostnameSpecificFilters: selfieFromDict(this.hostnameFilters), - entitySpecificFilters: this.entityFilters, + specificFilters: selfieFromMap(this.specificFilters), hasGenericHide: this.hasGenericHide, - lowGenericHide: selfieFromDict(this.lowGenericHide), + lowGenericHide: µb.setToArray(this.lowGenericHide), + lowGenericHideEx: µb.mapToArray(this.lowGenericHideEx), lowGenericHideCount: this.lowGenericHideCount, highLowGenericHide: this.highLowGenericHide, highLowGenericHideCount: this.highLowGenericHideCount, @@ -1419,8 +1422,8 @@ FilterContainer.prototype.toSelfie = function() { genericDonthide: this.genericDonthide, scriptTagFilters: this.scriptTagFilters, scriptTagFilterCount: this.scriptTagFilterCount, - scriptTags: this.scriptTags, - scriptTagCount: this.scriptTagCount + userScripts: selfieFromMap(this.userScripts), + userScriptCount: this.userScriptCount }; }; @@ -1431,18 +1434,17 @@ FilterContainer.prototype.fromSelfie = function(selfie) { '[]': FilterBucket, '#': FilterPlain, '#+': FilterPlainMore, - 'h': FilterHostname, - 'e': FilterEntity + 'h': FilterHostname }; - var dictFromSelfie = function(selfie) { - var dict = {}; - var dictKey; - var bucket = null; - var rawText = selfie; - var rawEnd = rawText.length; - var lineBeg = 0, lineEnd; - var line, pos, what, factory; + var mapFromSelfie = function(selfie) { + var map = new Map(), + key, + bucket = null, + rawText = selfie, + rawEnd = rawText.length, + lineBeg = 0, lineEnd, + line, pos, what, factory; while ( lineBeg < rawEnd ) { lineEnd = rawText.indexOf('\n', lineBeg); if ( lineEnd < 0 ) { @@ -1453,28 +1455,29 @@ FilterContainer.prototype.fromSelfie = function(selfie) { pos = line.indexOf('\t'); what = line.slice(0, pos); if ( what === 'k' ) { - dictKey = decode(line.slice(pos + 1)); + key = line.slice(pos + 1); bucket = null; continue; } factory = factories[what]; if ( bucket === null ) { - bucket = dict[dictKey] = factory.fromSelfie(line.slice(pos + 1)); + bucket = factory.fromSelfie(line.slice(pos + 1)); + map.set(key, bucket); continue; } // When token key is reused, it can't be anything // else than FilterBucket bucket.add(factory.fromSelfie(line.slice(pos + 1))); } - return dict; + return map; }; this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; - this.hostnameFilters = dictFromSelfie(selfie.hostnameSpecificFilters); - this.entityFilters = selfie.entitySpecificFilters; + this.specificFilters = mapFromSelfie(selfie.specificFilters); this.hasGenericHide = selfie.hasGenericHide; - this.lowGenericHide = dictFromSelfie(selfie.lowGenericHide); + this.lowGenericHide = µb.setFromArray(selfie.lowGenericHide); + this.lowGenericHideEx = µb.mapFromArray(selfie.lowGenericHideEx); this.lowGenericHideCount = selfie.lowGenericHideCount; this.highLowGenericHide = selfie.highLowGenericHide; this.highLowGenericHideCount = selfie.highLowGenericHideCount; @@ -1487,8 +1490,8 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.genericDonthide = selfie.genericDonthide; this.scriptTagFilters = selfie.scriptTagFilters; this.scriptTagFilterCount = selfie.scriptTagFilterCount; - this.scriptTags = selfie.scriptTags; - this.scriptTagCount = selfie.scriptTagCount; + this.userScripts = mapFromSelfie(selfie.userScripts); + this.userScriptCount = selfie.userScriptCount; this.frozen = true; }; @@ -1621,27 +1624,29 @@ FilterContainer.prototype.retrieveGenericSelectors = function(request) { }; } - var hideSelectors = r.hide; - var selector, bucket; - var selectors = request.selectors; - var i = selectors.length; + var hideSelectors = r.hide, + selectors = request.selectors, + i = selectors.length, + selector, bucket; while ( i-- ) { - if ( - (selector = selectors[i]) && - (bucket = this.lowGenericHide[selector]) - ) { - bucket.retrieve(selector, hideSelectors); + selector = selectors[i]; + if ( this.lowGenericHide.has(selector) === false ) { + continue; + } + if ( (bucket = this.lowGenericHideEx.get(selector)) ) { + if ( Array.isArray(bucket) ) { + hideSelectors = hideSelectors.concat(bucket); + } else { + hideSelectors.push(bucket); + } + } else { + hideSelectors.push(selector); } } + r.hide = hideSelectors; //quickProfiler.stop(); - //console.log( - // 'µBlock> abp-hide-filters.js: %d selectors in => %d selectors out', - // request.selectors.length, - // r.hide.length + r.donthide.length - //); - return r; }; @@ -1657,6 +1662,7 @@ FilterContainer.prototype.retrieveDomainSelectors = function(request, noCosmetic var hostname = this.µburi.hostnameFromURI(request.locationURL), domain = this.µburi.domainFromHostname(hostname) || hostname, pos = domain.indexOf('.'), + entity = pos === -1 ? '' : domain.slice(0, pos - domain.length) + '.*', cacheEntry = this.selectorCache[hostname]; // https://github.com/chrisaljoudi/uBlock/issues/587 @@ -1669,29 +1675,50 @@ FilterContainer.prototype.retrieveDomainSelectors = function(request, noCosmetic var r = { ready: this.frozen, domain: domain, - entity: pos === -1 ? domain : domain.slice(0, pos - domain.length), + entity: entity, noDOMSurveying: this.hasGenericHide === false, cosmeticHide: [], cosmeticDonthide: [], netHide: [], - scripts: this.retrieveScriptTags(domain, hostname) + scripts: undefined }; if ( !noCosmeticFiltering ) { var hash, bucket; - hash = makeHash(0, domain, this.domainHashMask); - if ( (bucket = this.hostnameFilters[hash]) ) { + + // Generic exception cosmetic filters. + r.cosmeticDonthide = this.genericDonthide.slice(); + + // Specific cosmetic filters. + hash = makeHash(domain); + if ( (bucket = this.specificFilters.get(hash)) ) { bucket.retrieve(hostname, r.cosmeticHide); } - // https://github.com/chrisaljoudi/uBlock/issues/188 - // Special bucket for those filters without a valid domain name as per PSL - if ( (bucket = this.hostnameFilters[this.type0NoDomainHash]) ) { - bucket.retrieve(hostname, r.cosmeticHide); + // Specific exception cosmetic filters. + if ( (bucket = this.specificFilters.get('!' + hash)) ) { + bucket.retrieve(hostname, r.cosmeticDonthide); } - // entity filter buckets are always plain js array - if ( this.entityFilters.hasOwnProperty(r.entity) ) { - r.cosmeticHide = r.cosmeticHide.concat(this.entityFilters[r.entity]); + // Specific entity-based cosmetic filters. + if ( entity !== '' ) { + // Specific entity-based cosmetic filters. + hash = makeHash(entity); + if ( (bucket = this.specificFilters.get(hash)) ) { + bucket.retrieve(entity, r.cosmeticHide); + } + // Specific entity-based exception cosmetic filters. + //if ( (bucket = this.specificFilters.get('!' + hash)) ) { + // bucket.retrieve(entity, r.cosmeticHide); + //} + } + + // https://github.com/chrisaljoudi/uBlock/issues/188 + // Special bucket for those filters without a valid domain name as per PSL + if ( (bucket = this.specificFilters.get(this.noDomainHash)) ) { + bucket.retrieve(hostname, r.cosmeticHide); + } + if ( (bucket = this.specificFilters.get('!' + this.noDomainHash)) ) { + bucket.retrieve(hostname, r.cosmeticDonthide); } // cached cosmetic filters. @@ -1701,23 +1728,11 @@ FilterContainer.prototype.retrieveDomainSelectors = function(request, noCosmetic r.noDOMSurveying = cacheEntry.cosmeticSurveyingMissCount > cosmeticSurveyingMissCountMax; } } - - // Exception cosmetic filters. - r.cosmeticDonthide = this.genericDonthide.slice(); - - hash = makeHash(1, domain, this.domainHashMask); - if ( (bucket = this.hostnameFilters[hash]) ) { - bucket.retrieve(hostname, r.cosmeticDonthide); - } - - // https://github.com/chrisaljoudi/uBlock/issues/188 - // Special bucket for those filters without a valid domain name as per PSL - if ( (bucket = this.hostnameFilters[this.type1NoDomainHash]) ) { - bucket.retrieve(hostname, r.cosmeticDonthide); - } - // No entity exceptions as of now } + // Scriptlet injection. + r.scripts = this.retrieveUserScripts(domain, hostname); + // Collapsible blocked resources. if ( cacheEntry ) { cacheEntry.retrieve('net', r.netHide); diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 08eb434aa..a24ecd18b 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -987,166 +987,24 @@ FilterRegexHostname.fromSelfie = function(s) { // Dictionary of hostnames // -// FilterHostnameDict is the main reason why uBlock is not equipped to keep -// track of which filter comes from which list, and also why it's not equipped -// to be able to disable a specific filter -- other than through using a -// counter-filter. -// -// On the other hand it is also *one* of the reason uBlock's memory and CPU -// footprint is smaller. Compacting huge list of hostnames into single strings -// saves a lot of memory compared to having one dictionary entry per hostname. - var FilterHostnameDict = function() { this.h = ''; // short-lived register - this.dict = {}; - this.count = 0; -}; - -// Somewhat arbitrary: I need to come up with hard data to know at which -// point binary search is better than indexOf. -// -// http://jsperf.com/string-indexof-vs-binary-search -// Tuning above performance benchmark, it appears 250 is roughly a good value -// for both Chromium/Firefox. -// Example of benchmark values: '------30', '-----100', etc. -- the -// needle string must always be 8-character long. - -FilterHostnameDict.prototype.cutoff = 250; - -// Probably not needed under normal circumstances. - -FilterHostnameDict.prototype.meltBucket = function(len, bucket) { - var map = {}; - if ( bucket.startsWith(' ') ) { - bucket.trim().split(' ').map(function(k) { - map[k] = true; - }); - } else { - var offset = 0; - while ( offset < bucket.length ) { - map[bucket.substr(offset, len)] = true; - offset += len; - } - } - return map; -}; - -FilterHostnameDict.prototype.freezeBucket = function(bucket) { - var hostnames = Object.keys(bucket); - if ( hostnames[0].length * hostnames.length < this.cutoff ) { - return ' ' + hostnames.join(' ') + ' '; - } - return hostnames.sort().join(''); -}; - -// How the key is derived dictates the number and size of buckets: -// - more bits = more buckets = higher memory footprint -// - less bits = less buckets = lower memory footprint -// - binary search mitigates very well the fact that some buckets may grow -// large when fewer bits are used (or when a large number of items are -// stored). Binary search also mitigate to the point of non-issue the -// CPU footprint requirement with large buckets, as far as reference -// benchmark shows. -// -// A hash key capable of better spread while being as fast would be -// just great. - -FilterHostnameDict.prototype.makeKey = function(hn) { - var len = hn.length; - if ( len > 255 ) { - len = 255; - } - var i8 = len >>> 3; - var i4 = len >>> 2; - var i2 = len >>> 1; - - // http://jsperf.com/makekey-concat-vs-join/3 - - // Be sure the msb is not set, this will guarantee a valid unicode - // character (because 0xD800-0xDFFF). - return String.fromCharCode( - (hn.charCodeAt( i8) & 0x01) << 14 | -// (hn.charCodeAt( i4 ) & 0x01) << 13 | - (hn.charCodeAt( i4+i8) & 0x01) << 12 | - (hn.charCodeAt(i2 ) & 0x01) << 11 | - (hn.charCodeAt(i2 +i8) & 0x01) << 10 | -// (hn.charCodeAt(i2+i4 ) & 0x01) << 9 | - (hn.charCodeAt(i2+i4+i8) & 0x01) << 8 , - len - ); + this.dict = new Set(); }; FilterHostnameDict.prototype.add = function(hn) { - var key = this.makeKey(hn); - var bucket = this.dict[key]; - if ( bucket === undefined ) { - bucket = this.dict[key] = {}; - bucket[hn] = true; - this.count += 1; - return true; - } - if ( typeof bucket === 'string' ) { - bucket = this.dict[key] = this.meltBucket(hn.length, bucket); - } - if ( bucket.hasOwnProperty(hn) ) { + if ( this.dict.has(hn) ) { return false; } - bucket[hn] = true; - this.count += 1; + this.dict.add(hn); return true; }; -FilterHostnameDict.prototype.freeze = function() { - var buckets = this.dict; - var bucket; - for ( var key in buckets ) { - bucket = buckets[key]; - if ( typeof bucket === 'object' ) { - buckets[key] = this.freezeBucket(bucket); - } - } -}; - -FilterHostnameDict.prototype.matchesExactly = function(hn) { - // TODO: Handle IP address - - var key = this.makeKey(hn); - var bucket = this.dict[key]; - if ( bucket === undefined ) { - return false; - } - if ( typeof bucket === 'object' ) { - bucket = this.dict[key] = this.freezeBucket(bucket); - } - if ( bucket.startsWith(' ') ) { - return bucket.indexOf(' ' + hn + ' ') !== -1; - } - // binary search - var len = hn.length; - var left = 0; - // http://jsperf.com/or-vs-floor/17 - var right = (bucket.length / len + 0.5) | 0; - var i, needle; - while ( left < right ) { - i = left + right >> 1; - needle = bucket.substr( len * i, len ); - if ( hn < needle ) { - right = i; - } else if ( hn > needle ) { - left = i + 1; - } else { - return true; - } - } - return false; -}; - FilterHostnameDict.prototype.match = function() { // TODO: mind IP addresses - var pos, hostname = requestHostnameRegister; - while ( this.matchesExactly(hostname) === false ) { + while ( this.dict.has(hostname) === false ) { pos = hostname.indexOf('.'); if ( pos === -1 ) { this.h = ''; @@ -1167,17 +1025,12 @@ FilterHostnameDict.prototype.rtCompile = function() { }; FilterHostnameDict.prototype.toSelfie = function() { - return JSON.stringify({ - count: this.count, - dict: this.dict - }); + return JSON.stringify(µb.setToArray(this.dict)); }; FilterHostnameDict.fromSelfie = function(s) { var f = new FilterHostnameDict(); - var o = JSON.parse(s); - f.count = o.count; - f.dict = o.dict; + f.dict = µb.setFromArray(JSON.parse(s)); return f; }; @@ -1728,8 +1581,8 @@ FilterContainer.prototype.reset = function() { this.allowFilterCount = 0; this.blockFilterCount = 0; this.discardedCount = 0; - this.duplicateBuster = {}; - this.categories = Object.create(null); + this.duplicateBuster = new Set(); + this.categories = new Map(); this.filterParser.reset(); this.filterCounts = {}; @@ -1743,17 +1596,7 @@ FilterContainer.prototype.reset = function() { FilterContainer.prototype.freeze = function() { histogram('allFilters', this.categories); - this.duplicateBuster = {}; - - var categories = this.categories; - var bucket; - for ( var k in categories ) { - bucket = categories[k]['.']; - if ( bucket !== undefined ) { - bucket.freeze(); - } - } - + this.duplicateBuster = new Set(); this.filterParser.reset(); this.frozen = true; }; @@ -1786,20 +1629,23 @@ FilterContainer.prototype.factories = { /******************************************************************************/ FilterContainer.prototype.toSelfie = function() { - var categoryToSelfie = function(dict) { - var selfie = []; - var bucket, ff, n, i, f; - for ( var token in dict ) { - // No need for hasOwnProperty() here: there is no prototype chain. - selfie.push('k2\t' + token); - bucket = dict[token]; + var categoryToSelfie = function(map) { + var selfie = [], + iterator = map.entries(), + entry, bucket, ff, f; + for (;;) { + entry = iterator.next(); + if ( entry.done ) { + break; + } + selfie.push('k2\t' + entry.value[0]); + bucket = entry.value[1]; selfie.push(bucket.fid + '\t' + bucket.toSelfie()); if ( bucket.fid !== '[]' ) { continue; } ff = bucket.filters; - n = ff.length; - for ( i = 0; i < n; i++ ) { + for ( var i = 0, ni = ff.length; i < ni; i++ ) { f = ff[i]; selfie.push(f.fid + '\t' + f.toSelfie()); } @@ -1807,12 +1653,17 @@ FilterContainer.prototype.toSelfie = function() { return selfie.join('\n'); }; - var categoriesToSelfie = function(dict) { - var selfie = []; - for ( var key in dict ) { - // No need for hasOwnProperty() here: there is no prototype chain. - selfie.push('k1\t' + key); - selfie.push(categoryToSelfie(dict[key])); + var categoriesToSelfie = function(map) { + var selfie = [], + iterator = map.entries(), + entry; + for (;;) { + entry = iterator.next(); + if ( entry.done ) { + break; + } + selfie.push('k1\t' + entry.value[0]); + selfie.push(categoryToSelfie(entry.value[1])); } return selfie.join('\n'); }; @@ -1840,7 +1691,7 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.discardedCount = selfie.discardedCount; var catKey, tokenKey; - var dict = this.categories, subdict; + var map = this.categories, submap; var bucket = null; var rawText = selfie.categories; var rawEnd = rawText.length; @@ -1857,7 +1708,8 @@ FilterContainer.prototype.fromSelfie = function(selfie) { what = line.slice(0, pos); if ( what === 'k1' ) { catKey = line.slice(pos + 1); - subdict = dict[catKey] = Object.create(null); + submap = new Map(); + map.set(catKey, submap); bucket = null; continue; } @@ -1868,7 +1720,8 @@ FilterContainer.prototype.fromSelfie = function(selfie) { } factory = this.factories[what]; if ( bucket === null ) { - bucket = subdict[tokenKey] = factory.fromSelfie(line.slice(pos + 1)); + bucket = factory.fromSelfie(line.slice(pos + 1)); + submap.set(tokenKey, bucket); continue; } // When token key is reused, it can't be anything @@ -2088,66 +1941,69 @@ FilterContainer.prototype.compileToAtomicFilter = function(filterClass, parsed, /******************************************************************************/ FilterContainer.prototype.fromCompiledContent = function(lineIter) { - var line, fields, bucket, entry, factory, filter; + var line, hash, token, fclass, fdata, + bucket, entry, factory, filter, + fieldIter = new µb.FieldIterator('\v'); while ( lineIter.eot() === false ) { if ( lineIter.text.charCodeAt(lineIter.offset) !== 0x6E /* 'n' */ ) { return; } - line = lineIter.next().slice(2); - fields = line.split('\v'); + line = lineIter.next(); + + fieldIter.first(line); + hash = fieldIter.next(); + token = fieldIter.next(); + fclass = fieldIter.next(); + fdata = fieldIter.next(); // Special cases: delegate to more specialized engines. // Redirect engine. - if ( fields[2] === '=>' ) { - µb.redirectEngine.fromCompiledRule(fields[3]); + if ( fclass === '=>' ) { + µb.redirectEngine.fromCompiledRule(fdata); continue; } // Plain static filters. this.acceptedCount += 1; - bucket = this.categories[fields[0]]; + bucket = this.categories.get(hash); if ( bucket === undefined ) { - bucket = this.categories[fields[0]] = Object.create(null); + bucket = new Map(); + this.categories.set(hash, bucket); } - entry = bucket[fields[1]]; + entry = bucket.get(token); - if ( fields[1] === '.' ) { + if ( token === '.' ) { if ( entry === undefined ) { - entry = bucket['.'] = new FilterHostnameDict(); + entry = new FilterHostnameDict(); + bucket.set('.', new FilterHostnameDict()); } - if ( entry.add(fields[2]) === false ) { + // 'fclass' is hostname + if ( entry.add(fclass) === false ) { this.discardedCount += 1; } continue; } - if ( this.duplicateBuster.hasOwnProperty(line) ) { + if ( this.duplicateBuster.has(line) ) { this.discardedCount += 1; continue; } - this.duplicateBuster[line] = true; + this.duplicateBuster.add(line); - factory = this.factories[fields[2]]; + factory = this.factories[fclass]; - // For development purpose - //if ( this.filterCounts.hasOwnProperty(fields[2]) === false ) { - // this.filterCounts[fields[2]] = 1; - //} else { - // this.filterCounts[fields[2]]++; - //} - - filter = factory.fromSelfie(fields[3]); + filter = factory.fromSelfie(fdata); if ( entry === undefined ) { - bucket[fields[1]] = filter; + bucket.set(token, filter); continue; } if ( entry.fid === '[]' ) { entry.add(filter); continue; } - bucket[fields[1]] = new FilterBucket(entry, filter); + bucket.set(token, new FilterBucket(entry, filter)); } }; @@ -2289,9 +2145,12 @@ FilterContainer.prototype.filterRegexFromCompiled = function(compiled, flags) { /******************************************************************************/ +// bucket: Map +// url: string + FilterContainer.prototype.matchTokens = function(bucket, url) { // Hostname-only filters - var f = bucket['.']; + var f = bucket.get('.'); if ( f !== undefined && f.match() ) { this.tokenRegister = '.'; this.fRegister = f; @@ -2307,7 +2166,7 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { if ( token === '' ) { break; } - f = bucket[token]; + f = bucket.get(token); if ( f !== undefined && f.match(url, tokenEntry.beg) ) { this.tokenRegister = token; this.fRegister = f; @@ -2316,7 +2175,7 @@ FilterContainer.prototype.matchTokens = function(bucket, url) { } // Regex-based filters - f = bucket['*']; + f = bucket.get('*'); if ( f !== undefined && f.match(url) ) { this.tokenRegister = '*'; this.fRegister = f; @@ -2361,7 +2220,7 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r if ( requestType === 'elemhide' ) { key = AllowAnyParty | type; if ( - (bucket = categories[toHex(key)]) && + (bucket = categories.get(toHex(key))) && this.matchTokens(bucket, url) ) { this.keyRegister = key; @@ -2373,14 +2232,14 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r // https://github.com/chrisaljoudi/uBlock/issues/139 // Test against important block filters key = BlockAnyParty | Important | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; } } key = BlockAction | Important | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; @@ -2389,14 +2248,14 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r // Test against block filters key = BlockAnyParty | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAction | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } @@ -2410,14 +2269,14 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r // Test against allow filters key = AllowAnyParty | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; } } key = AllowAction | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; @@ -2480,28 +2339,28 @@ FilterContainer.prototype.matchString = function(context) { // evaluation. Normally, it is "evaluate block then evaluate allow", with // the `important` property it is "evaluate allow then evaluate block". key = BlockAnyTypeAnyParty | Important; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; } } key = BlockAnyType | Important | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; } } key = BlockAnyParty | Important | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; } } key = BlockAction | Important | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return true; @@ -2510,28 +2369,28 @@ FilterContainer.prototype.matchString = function(context) { // Test against block filters key = BlockAnyTypeAnyParty; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAnyType | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAnyParty | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } } if ( this.fRegister === null ) { key = BlockAction | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; } @@ -2547,28 +2406,28 @@ FilterContainer.prototype.matchString = function(context) { // Test against allow filters key = AllowAnyTypeAnyParty; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; } } key = AllowAnyType | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; } } key = AllowAnyParty | type; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; } } key = AllowAction | type | party; - if ( (bucket = categories[toHex(key)]) ) { + if ( (bucket = categories.get(toHex(key))) ) { if ( this.matchTokens(bucket, url) ) { this.keyRegister = key; return false; diff --git a/src/js/utils.js b/src/js/utils.js index de1f5380e..2dde6c95e 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -126,18 +126,16 @@ µBlock.LineIterator = function(text, offset) { this.text = text; + this.textLen = this.text.length; this.offset = offset || 0; }; µBlock.LineIterator.prototype.next = function() { - if ( this.offset >= this.text.length ) { - return undefined; - } var lineEnd = this.text.indexOf('\n', this.offset); if ( lineEnd === -1 ) { lineEnd = this.text.indexOf('\r', this.offset); if ( lineEnd === -1 ) { - lineEnd = this.text.length; + lineEnd = this.textLen; } } var line = this.text.slice(this.offset, lineEnd); @@ -146,7 +144,69 @@ }; µBlock.LineIterator.prototype.eot = function() { - return this.offset >= this.text.length; + return this.offset >= this.textLen; +}; + +/******************************************************************************/ + +// The field iterator is less CPU-intensive than when using native +// String.split(). + +µBlock.FieldIterator = function(sep) { + this.text = ''; + this.sep = sep; + this.sepLen = sep.length; + this.offset = 0; +}; + +µBlock.FieldIterator.prototype.first = function(text) { + this.text = text; + this.offset = 0; + return this.next(); +}; + +µBlock.FieldIterator.prototype.next = function() { + var end = this.text.indexOf(this.sep, this.offset); + if ( end === -1 ) { + end = this.text.length; + } + var field = this.text.slice(this.offset, end); + this.offset = end + this.sepLen; + return field; +}; + +/******************************************************************************/ + +µBlock.mapToArray = function(map) { + var out = [], + entries = map.entries(), + entry; + for (;;) { + entry = entries.next(); + if ( entry.done ) { break; } + out.push([ entry.value[0], entry.value[1] ]); + } + return out; +}; + +µBlock.mapFromArray = function(arr) { + return new Map(arr); +}; + +µBlock.setToArray = function(dict) { + var out = [], + entries = dict.values(), + entry; + for (;;) { + entry = entries.next(); + if ( entry.done ) { break; } + out.push(entry.value); + } + return out; +}; + +µBlock.setFromArray = function(arr) { + return new Set(arr); }; /******************************************************************************/ diff --git a/tools/make-firefox.sh b/tools/make-firefox.sh index 7d4c3c5ed..b90814656 100755 --- a/tools/make-firefox.sh +++ b/tools/make-firefox.sh @@ -22,6 +22,7 @@ cat src/background.html | sed -e '/vapi-polyfill\.js/d' > $DES/background.html mv $DES/img/icon_128.png $DES/icon.png cp platform/firefox/css/* $DES/css/ +cp platform/firefox/polyfill.js $DES/js/ cp platform/firefox/vapi-*.js $DES/js/ cp platform/firefox/bootstrap.js $DES/ cp platform/firefox/frame*.js $DES/