mirror of https://github.com/gorhill/uBlock.git
Do not store impossible to match filters in HNTrie
Consider the two following filters: example.com www.example.com This commit make it so that if the first filter is already present in a given HNTrie, the second filter will not be stored, since HNTrie will _always_ return the first filter as a match whenever the hostname to match is example.com or any subdomain of example.com. The detection of such pointless filters is virtually free when adding a hostname to an HNTrie instance (given how data is stored in the trie), so in practice no overhead is incurred to detect such pointless filters. The ability to ignore impossible to match filters in HNTrie instances will _especially_ benefit those using large hosts files. Examples of how this helps using real configurations: - Default lists: 444 filters out of 100,382 were ignored as a result of this commit. - Default lists + "Energized Ultimate Protection": 283,669 filters out of 903,235 were ignored as a result of this commit. Side note: There was no measurable difference between the two configurations above in the performance of the matching algorithm as reported by the built-in benchmark tool.
This commit is contained in:
parent
c4f9ae706a
commit
adabb56dc9
|
@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line
|
||||||
// Read-only
|
// Read-only
|
||||||
systemSettings: {
|
systemSettings: {
|
||||||
compiledMagic: 13, // Increase when compiled format changes
|
compiledMagic: 13, // Increase when compiled format changes
|
||||||
selfieMagic: 13 // Increase when selfie format changes
|
selfieMagic: 14 // Increase when selfie format changes
|
||||||
},
|
},
|
||||||
|
|
||||||
restoreBackupSettings: {
|
restoreBackupSettings: {
|
||||||
|
|
|
@ -175,7 +175,8 @@ const HNTrieContainer = class {
|
||||||
matchesJS(iroot) {
|
matchesJS(iroot) {
|
||||||
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
|
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
|
||||||
let ineedle = this.buf[255];
|
let ineedle = this.buf[255];
|
||||||
let icell = iroot;
|
let icell = this.buf32[iroot+0];
|
||||||
|
if ( icell === 0 ) { return -1; }
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if ( ineedle === 0 ) { return -1; }
|
if ( ineedle === 0 ) { return -1; }
|
||||||
ineedle -= 1;
|
ineedle -= 1;
|
||||||
|
@ -238,12 +239,6 @@ const HNTrieContainer = class {
|
||||||
addJS(iroot) {
|
addJS(iroot) {
|
||||||
let lhnchar = this.buf[255];
|
let lhnchar = this.buf[255];
|
||||||
if ( lhnchar === 0 ) { return 0; }
|
if ( lhnchar === 0 ) { return 0; }
|
||||||
let icell = iroot;
|
|
||||||
// special case: first node in trie
|
|
||||||
if ( this.buf32[icell+2] === 0 ) {
|
|
||||||
this.buf32[icell+2] = this.addSegment(lhnchar);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
// grow buffer if needed
|
// grow buffer if needed
|
||||||
if (
|
if (
|
||||||
(this.buf32[HNTRIE_CHAR0_SLOT] - this.buf32[HNTRIE_TRIE1_SLOT]) < 24 ||
|
(this.buf32[HNTRIE_CHAR0_SLOT] - this.buf32[HNTRIE_TRIE1_SLOT]) < 24 ||
|
||||||
|
@ -251,6 +246,12 @@ const HNTrieContainer = class {
|
||||||
) {
|
) {
|
||||||
this.growBuf(24, 256);
|
this.growBuf(24, 256);
|
||||||
}
|
}
|
||||||
|
let icell = this.buf32[iroot+0];
|
||||||
|
// special case: first node in trie
|
||||||
|
if ( icell === 0 ) {
|
||||||
|
this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
//
|
//
|
||||||
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
|
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
|
||||||
let inext;
|
let inext;
|
||||||
|
@ -259,6 +260,9 @@ const HNTrieContainer = class {
|
||||||
const vseg = this.buf32[icell+2];
|
const vseg = this.buf32[icell+2];
|
||||||
// skip boundary cells
|
// skip boundary cells
|
||||||
if ( vseg === 0 ) {
|
if ( vseg === 0 ) {
|
||||||
|
// remainder is at label boundary? if yes, no need to add
|
||||||
|
// the rest since the shortest match is always reported
|
||||||
|
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
|
||||||
icell = this.buf32[icell+1];
|
icell = this.buf32[icell+1];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -303,6 +307,9 @@ const HNTrieContainer = class {
|
||||||
icell = inext;
|
icell = inext;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// remainder is at label boundary? if yes, no need to add
|
||||||
|
// the rest since the shortest match is always reported
|
||||||
|
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
|
||||||
// boundary cell + needle remainder
|
// boundary cell + needle remainder
|
||||||
inext = this.addCell(0, 0, 0);
|
inext = this.addCell(0, 0, 0);
|
||||||
this.buf32[icell+1] = inext;
|
this.buf32[icell+1] = inext;
|
||||||
|
@ -550,7 +557,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
|
||||||
}
|
}
|
||||||
|
|
||||||
add(hn) {
|
add(hn) {
|
||||||
if ( this.container.setNeedle(hn).add(this.iroot) === 1 ) {
|
if ( this.container.setNeedle(hn).add(this.iroot) > 0 ) {
|
||||||
this.last = -1;
|
this.last = -1;
|
||||||
this.needle = '';
|
this.needle = '';
|
||||||
this.size += 1;
|
this.size += 1;
|
||||||
|
@ -560,7 +567,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
|
||||||
}
|
}
|
||||||
|
|
||||||
addJS(hn) {
|
addJS(hn) {
|
||||||
if ( this.container.setNeedle(hn).addJS(this.iroot) === 1 ) {
|
if ( this.container.setNeedle(hn).addJS(this.iroot) > 0 ) {
|
||||||
this.last = -1;
|
this.last = -1;
|
||||||
this.needle = '';
|
this.needle = '';
|
||||||
this.size += 1;
|
this.size += 1;
|
||||||
|
@ -570,7 +577,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
|
||||||
}
|
}
|
||||||
|
|
||||||
addWASM(hn) {
|
addWASM(hn) {
|
||||||
if ( this.container.setNeedle(hn).addWASM(this.iroot) === 1 ) {
|
if ( this.container.setNeedle(hn).addWASM(this.iroot) > 0 ) {
|
||||||
this.last = -1;
|
this.last = -1;
|
||||||
this.needle = '';
|
this.needle = '';
|
||||||
this.size += 1;
|
this.size += 1;
|
||||||
|
|
Binary file not shown.
|
@ -51,8 +51,9 @@
|
||||||
;; offset.
|
;; offset.
|
||||||
;;
|
;;
|
||||||
(func (export "matches")
|
(func (export "matches")
|
||||||
(param $icell i32) ;; offset to root cell of the trie
|
(param $iroot i32) ;; offset to root cell of the trie
|
||||||
(result i32) ;; result = match index, -1 = miss
|
(result i32) ;; result = match index, -1 = miss
|
||||||
|
(local $icell i32) ;; offset to the current cell
|
||||||
(local $char0 i32) ;; offset to first character data
|
(local $char0 i32) ;; offset to first character data
|
||||||
(local $ineedle i32) ;; current needle offset
|
(local $ineedle i32) ;; current needle offset
|
||||||
(local $c i32)
|
(local $c i32)
|
||||||
|
@ -64,15 +65,24 @@
|
||||||
i32.const 264 ;; start of char section is stored at addr 264
|
i32.const 264 ;; start of char section is stored at addr 264
|
||||||
i32.load
|
i32.load
|
||||||
set_local $char0
|
set_local $char0
|
||||||
;; $icell is an index into an array of 32-bit values
|
|
||||||
get_local $icell
|
|
||||||
i32.const 2
|
|
||||||
i32.shl
|
|
||||||
set_local $icell
|
|
||||||
;; let ineedle = this.buf[255];
|
;; let ineedle = this.buf[255];
|
||||||
i32.const 255 ;; addr of needle is stored at addr 255
|
i32.const 255 ;; addr of needle is stored at addr 255
|
||||||
i32.load8_u
|
i32.load8_u
|
||||||
set_local $ineedle
|
set_local $ineedle
|
||||||
|
;; let icell = this.buf32[iroot+0];
|
||||||
|
get_local $iroot
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
i32.load
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
tee_local $icell
|
||||||
|
;; if ( icell === 0 ) { return -1; }
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
i32.const -1
|
||||||
|
return
|
||||||
|
end
|
||||||
;; for (;;) {
|
;; for (;;) {
|
||||||
block $noSegment loop $nextSegment
|
block $noSegment loop $nextSegment
|
||||||
;; if ( ineedle === 0 ) { return -1; }
|
;; if ( ineedle === 0 ) { return -1; }
|
||||||
|
@ -244,8 +254,9 @@
|
||||||
;; Add a new hostname to a trie which root cell is passed as argument.
|
;; Add a new hostname to a trie which root cell is passed as argument.
|
||||||
;;
|
;;
|
||||||
(func (export "add")
|
(func (export "add")
|
||||||
(param $icell i32) ;; index of root cell of the trie
|
(param $iroot i32) ;; index of root cell of the trie
|
||||||
(result i32) ;; result: 0 not added, 1 = added
|
(result i32) ;; result: 0 not added, 1 = added
|
||||||
|
(local $icell i32) ;; index of current cell in the trie
|
||||||
(local $lhnchar i32) ;; number of characters left to process in hostname
|
(local $lhnchar i32) ;; number of characters left to process in hostname
|
||||||
(local $char0 i32) ;; offset to start of character data section
|
(local $char0 i32) ;; offset to start of character data section
|
||||||
(local $vseg i32) ;; integer value describing a segment
|
(local $vseg i32) ;; integer value describing a segment
|
||||||
|
@ -264,24 +275,6 @@
|
||||||
i32.const 0
|
i32.const 0
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
;; let icell = iroot;
|
|
||||||
get_local $icell
|
|
||||||
i32.const 2
|
|
||||||
i32.shl
|
|
||||||
tee_local $icell
|
|
||||||
;; if ( this.buf32[icell+2] === 0 ) {
|
|
||||||
i32.load offset=8
|
|
||||||
i32.eqz
|
|
||||||
if
|
|
||||||
;;this.buf32[icell+2] = this.addSegment(lhnchar);
|
|
||||||
;; return 1;
|
|
||||||
get_local $icell
|
|
||||||
get_local $lhnchar
|
|
||||||
call $addSegment
|
|
||||||
i32.store offset=8
|
|
||||||
i32.const 1
|
|
||||||
return
|
|
||||||
end
|
|
||||||
;; if (
|
;; if (
|
||||||
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
|
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
|
||||||
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
|
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
|
||||||
|
@ -310,6 +303,30 @@
|
||||||
call $growBuf
|
call $growBuf
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
;; let icell = this.buf32[iroot+0];
|
||||||
|
get_local $iroot
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
tee_local $iroot
|
||||||
|
i32.load
|
||||||
|
i32.const 2
|
||||||
|
i32.shl
|
||||||
|
tee_local $icell
|
||||||
|
;; if ( this.buf32[icell+2] === 0 ) {
|
||||||
|
i32.eqz
|
||||||
|
if
|
||||||
|
;; this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
|
||||||
|
;; return 1;
|
||||||
|
get_local $iroot
|
||||||
|
i32.const 0
|
||||||
|
i32.const 0
|
||||||
|
get_local $lhnchar
|
||||||
|
call $addSegment
|
||||||
|
call $addCell
|
||||||
|
i32.store
|
||||||
|
i32.const 1
|
||||||
|
return
|
||||||
|
end
|
||||||
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
|
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
|
||||||
i32.const 264
|
i32.const 264
|
||||||
i32.load
|
i32.load
|
||||||
|
@ -323,6 +340,19 @@
|
||||||
;; if ( vseg === 0 ) {
|
;; if ( vseg === 0 ) {
|
||||||
i32.eqz
|
i32.eqz
|
||||||
if
|
if
|
||||||
|
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.const 0x2E
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
i32.const -1
|
||||||
|
return
|
||||||
|
end
|
||||||
|
;; icell = this.buf32[icell+1];
|
||||||
|
;; continue;
|
||||||
get_local $icell
|
get_local $icell
|
||||||
i32.load offset=4
|
i32.load offset=4
|
||||||
i32.const 2
|
i32.const 2
|
||||||
|
@ -463,13 +493,23 @@
|
||||||
else
|
else
|
||||||
;; if ( inext !== 0 ) {
|
;; if ( inext !== 0 ) {
|
||||||
get_local $inext
|
get_local $inext
|
||||||
i32.eqz
|
if
|
||||||
if else
|
|
||||||
;; icell = inext;
|
;; icell = inext;
|
||||||
get_local $inext
|
get_local $inext
|
||||||
set_local $icell
|
set_local $icell
|
||||||
br $nextSegment
|
br $nextSegment
|
||||||
end
|
end
|
||||||
|
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
|
||||||
|
get_local $lhnchar
|
||||||
|
i32.const -1
|
||||||
|
i32.add
|
||||||
|
i32.load8_u
|
||||||
|
i32.const 0x2E
|
||||||
|
i32.eq
|
||||||
|
if
|
||||||
|
i32.const -1
|
||||||
|
return
|
||||||
|
end
|
||||||
;; inext = this.addCell(0, 0, 0);
|
;; inext = this.addCell(0, 0, 0);
|
||||||
;; this.buf32[icell+1] = inext;
|
;; this.buf32[icell+1] = inext;
|
||||||
get_local $icell
|
get_local $icell
|
||||||
|
|
Loading…
Reference in New Issue