Add HNTrieRef.dump() and STrieRef.dump() as dev tool

To be used at the console, as an investigation tool for
development purpose.

Using it to verify the content of the largest
FilterHostnameDict instance, I spotted an all-uppercase
hostname in the HNTrieRef instance:

µBlock.staticNetFilteringEngine.categories.get(0).get(0x10000000).dict.dump();

Thus the changes to static-net-filtering.js are to fix
the erroneous insertion of filters with uppercase
characters. The single instance found was a hostname entry
in Malware Domain List (TRIANGLESERVICESLTD dot COM).
This commit is contained in:
Raymond Hill 2019-05-06 11:12:39 -04:00
parent b654d883df
commit 3692bb4ada
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
3 changed files with 31 additions and 14 deletions

View File

@ -610,6 +610,20 @@ HNTrieContainer.prototype.HNTrieRef = class {
return this.last; return this.last;
} }
dump() {
let hostnames = Array.from(this);
if ( String.prototype.padStart instanceof Function ) {
const maxlen = Math.min(
hostnames.reduce((maxlen, hn) => Math.max(maxlen, hn.length), 0),
64
);
hostnames = hostnames.map(hn => hn.padStart(maxlen));
}
for ( const hn of hostnames ) {
console.log(hn);
}
}
[Symbol.iterator]() { [Symbol.iterator]() {
return { return {
value: undefined, value: undefined,

View File

@ -1737,10 +1737,8 @@ const FilterParser = function() {
this.reBadDomainOptChars = /[*+?^${}()[\]\\]/; this.reBadDomainOptChars = /[*+?^${}()[\]\\]/;
this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i; this.reHostnameRule1 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]$/i;
this.reHostnameRule2 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i; this.reHostnameRule2 = /^[0-9a-z][0-9a-z.-]*[0-9a-z]\^?$/i;
this.reCleanupHostnameRule2 = /\^$/g;
this.reCanTrimCarets1 = /^[^*]*$/; this.reCanTrimCarets1 = /^[^*]*$/;
this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/; this.reCanTrimCarets2 = /^\^?[^^]+[^^][^^]+\^?$/;
this.reHasUppercase = /[A-Z]/;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/; this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reHasUnicode = /[^\x00-\x7F]/; this.reHasUnicode = /[^\x00-\x7F]/;
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
@ -1990,18 +1988,18 @@ FilterParser.prototype.parse = function(raw) {
// important! // important!
this.reset(); this.reset();
var s = this.raw = raw; let s = this.raw = raw;
// plain hostname? (from HOSTS file) // plain hostname? (from HOSTS file)
if ( this.reHostnameRule1.test(s) ) { if ( this.reHostnameRule1.test(s) ) {
this.f = s; this.f = s.toLowerCase();
this.hostnamePure = true; this.hostnamePure = true;
this.anchor |= 0x4; this.anchor |= 0x4;
return this; return this;
} }
// element hiding filter? // element hiding filter?
var pos = s.indexOf('#'); let pos = s.indexOf('#');
if ( pos !== -1 ) { if ( pos !== -1 ) {
var c = s.charAt(pos + 1); var c = s.charAt(pos + 1);
if ( c === '#' || c === '@' ) { if ( c === '#' || c === '@' ) {
@ -2093,7 +2091,10 @@ FilterParser.prototype.parse = function(raw) {
// A filter can't be a pure-hostname one if there is a domain or csp // A filter can't be a pure-hostname one if there is a domain or csp
// option present. // option present.
if ( this.reHostnameRule2.test(s) ) { if ( this.reHostnameRule2.test(s) ) {
this.f = s.replace(this.reCleanupHostnameRule2, ''); if ( s.charCodeAt(s.length - 1) === 0x5E /* '^' */ ) {
s = s.slice(0, -1);
}
this.f = s.toLowerCase();
this.hostnamePure = true; this.hostnamePure = true;
return this; return this;
} }
@ -2138,11 +2139,7 @@ FilterParser.prototype.parse = function(raw) {
} }
this.wildcarded = reIsWildcarded.test(s); this.wildcarded = reIsWildcarded.test(s);
this.f = s.toLowerCase();
// This might look weird but we gain memory footprint by not going through
// toLowerCase(), at least on Chromium. Because copy-on-write?
this.f = this.reHasUppercase.test(s) ? s.toLowerCase() : s;
return this; return this;
}; };

View File

@ -385,6 +385,12 @@ STrieContainer.prototype.STrieRef = class {
return this.container.matches(this.iroot, a, al); return this.container.matches(this.iroot, a, al);
} }
dump() {
for ( const s of this ) {
console.log(s);
}
}
[Symbol.iterator]() { [Symbol.iterator]() {
return { return {
value: undefined, value: undefined,
@ -408,8 +414,8 @@ STrieContainer.prototype.STrieRef = class {
let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF); let i0 = this.container.buf32[STRIE_CHAR0_SLOT] + (v & 0x00FFFFFF);
const i1 = i0 + (v >>> 24); const i1 = i0 + (v >>> 24);
while ( i0 < i1 ) { while ( i0 < i1 ) {
this.charPtr -= 1;
this.charBuf[this.charPtr] = this.container.buf[i0]; this.charBuf[this.charPtr] = this.container.buf[i0];
this.charPtr += 1;
i0 += 1; i0 += 1;
} }
this.icell = this.container.buf32[this.icell+1]; this.icell = this.container.buf32[this.icell+1];
@ -424,14 +430,14 @@ STrieContainer.prototype.STrieRef = class {
}, },
toPattern: function() { toPattern: function() {
this.value = this.textDecoder.decode( this.value = this.textDecoder.decode(
new Uint8Array(this.charBuf.buffer, this.charPtr) new Uint8Array(this.charBuf.buffer, 0, this.charPtr)
); );
return this; return this;
}, },
container: this.container, container: this.container,
icell: this.iroot, icell: this.iroot,
charBuf: new Uint8Array(256), charBuf: new Uint8Array(256),
charPtr: 256, charPtr: 0,
forks: [], forks: [],
textDecoder: new TextDecoder() textDecoder: new TextDecoder()
}; };