From 3692bb4ada23be3a41bfcad5ca627d095e171070 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Mon, 6 May 2019 11:12:39 -0400 Subject: [PATCH] Add HNTrieRef.dump() and STrieRef.dump() as dev tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be used at the console, as an investigation tool for development purpose. Using it to verify the content of the largest FilterHostnameDict instance, I spotted an all-uppercase hostname in the HNTrieRef instance: µBlock.staticNetFilteringEngine.categories.get(0).get(0x10000000).dict.dump(); Thus the changes to static-net-filtering.js are to fix the erroneous insertion of filters with uppercase characters. The single instance found was a hostname entry in Malware Domain List (TRIANGLESERVICESLTD dot COM). --- src/js/hntrie.js | 14 ++++++++++++++ src/js/static-net-filtering.js | 19 ++++++++----------- src/js/strie.js | 12 +++++++++--- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/js/hntrie.js b/src/js/hntrie.js index 366451af3..7da7d7778 100644 --- a/src/js/hntrie.js +++ b/src/js/hntrie.js @@ -610,6 +610,20 @@ HNTrieContainer.prototype.HNTrieRef = class { return this.last; } + dump() { + let hostnames = Array.from(this); + if ( String.prototype.padStart instanceof Function ) { + const maxlen = Math.min( + hostnames.reduce((maxlen, hn) => Math.max(maxlen, hn.length), 0), + 64 + ); + hostnames = hostnames.map(hn => hn.padStart(maxlen)); + } + for ( const hn of hostnames ) { + console.log(hn); + } + } + [Symbol.iterator]() { return { value: undefined, diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index c821e7a1e..169e6251c 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -1737,10 +1737,8 @@ const FilterParser = function() { this.reBadDomainOptChars = /[*+?^${}()[\]\\]/; this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i; this.reHostnameRule2 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i; - this.reCleanupHostnameRule2 = /\^$/g; this.reCanTrimCarets1 = /^[^*]*$/; this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/; - this.reHasUppercase = /[A-Z]/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reHasUnicode = /[^\x00-\x7F]/; this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; @@ -1990,18 +1988,18 @@ FilterParser.prototype.parse = function(raw) { // important! this.reset(); - var s = this.raw = raw; + let s = this.raw = raw; // plain hostname? (from HOSTS file) if ( this.reHostnameRule1.test(s) ) { - this.f = s; + this.f = s.toLowerCase(); this.hostnamePure = true; this.anchor |= 0x4; return this; } // element hiding filter? - var pos = s.indexOf('#'); + let pos = s.indexOf('#'); if ( pos !== -1 ) { var c = s.charAt(pos + 1); if ( c === '#' || c === '@' ) { @@ -2093,7 +2091,10 @@ FilterParser.prototype.parse = function(raw) { // A filter can't be a pure-hostname one if there is a domain or csp // option present. if ( this.reHostnameRule2.test(s) ) { - this.f = s.replace(this.reCleanupHostnameRule2, ''); + if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) { + s = s.slice(0, -1); + } + this.f = s.toLowerCase(); this.hostnamePure = true; return this; } @@ -2138,11 +2139,7 @@ FilterParser.prototype.parse = function(raw) { } this.wildcarded = reIsWildcarded.test(s); - - // This might look weird but we gain memory footprint by not going through - // toLowerCase(), at least on Chromium. Because copy-on-write? - - this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s; + this.f = s.toLowerCase(); return this; }; diff --git a/src/js/strie.js b/src/js/strie.js index 963671e97..dc8fc6ac0 100644 --- a/src/js/strie.js +++ b/src/js/strie.js @@ -385,6 +385,12 @@ STrieContainer.prototype.STrieRef = class { return this.container.matches(this.iroot, a, al); } + dump() { + for ( const s of this ) { + console.log(s); + } + } + [Symbol.iterator]() { return { value: undefined, @@ -408,8 +414,8 @@ STrieContainer.prototype.STrieRef = class { let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF); const i1 = i0 + (v >>> 24); while ( i0 < i1 ) { - this.charPtr -= 1; this.charBuf[this.charPtr] = this.container.buf[i0]; + this.charPtr += 1; i0 += 1; } this.icell = this.container.buf32[this.icell+1]; @@ -424,14 +430,14 @@ STrieContainer.prototype.STrieRef = class { }, toPattern: function() { this.value = this.textDecoder.decode( - new Uint8Array(this.charBuf.buffer, this.charPtr) + new Uint8Array(this.charBuf.buffer, 0, this.charPtr) ); return this; }, container: this.container, icell: this.iroot, charBuf: new Uint8Array(256), - charPtr: 256, + charPtr: 0, forks: [], textDecoder: new TextDecoder() };