mirror of https://github.com/gorhill/uBlock.git
with the new `important` filter option, a tokenizer makes sense now
This commit is contained in:
parent
d6b501b264
commit
c48a99c4bf
|
@ -63,7 +63,7 @@ var typeNameToTypeValue = {
|
||||||
'other': 8 << 4,
|
'other': 8 << 4,
|
||||||
'popup': 9 << 4
|
'popup': 9 << 4
|
||||||
};
|
};
|
||||||
const AnyType = typeNameToTypeValue['any'];
|
const AnyType = typeNameToTypeValue.any;
|
||||||
|
|
||||||
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
|
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
|
||||||
const BlockAnyType1stParty = BlockAction | AnyType | FirstParty;
|
const BlockAnyType1stParty = BlockAction | AnyType | FirstParty;
|
||||||
|
@ -90,8 +90,10 @@ var reURLPostHostnameAnchors = /[\/?#]/;
|
||||||
// regex tester: http://regex101.com/
|
// regex tester: http://regex101.com/
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
var histogram = function() {};
|
||||||
/*
|
/*
|
||||||
var histogram = function(label, categories) {
|
histogram = function(label, categories) {
|
||||||
var h = [],
|
var h = [],
|
||||||
categoryBucket;
|
categoryBucket;
|
||||||
for ( var k in categories ) {
|
for ( var k in categories ) {
|
||||||
|
@ -101,7 +103,7 @@ var histogram = function(label, categories) {
|
||||||
// No need for hasOwnProperty() here: there is no prototype chain.
|
// No need for hasOwnProperty() here: there is no prototype chain.
|
||||||
filterBucket = categoryBucket[kk];
|
filterBucket = categoryBucket[kk];
|
||||||
h.push({
|
h.push({
|
||||||
k: k + ' ' + kk,
|
k: k.charCodeAt(0).toString(2) + ' ' + kk,
|
||||||
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
|
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -815,16 +817,30 @@ FilterManyWildcardsHostname.fromSelfie = function(s) {
|
||||||
// here because the special treatment would be only for a few specific tokens,
|
// here because the special treatment would be only for a few specific tokens,
|
||||||
// not systematically done for all tokens.
|
// not systematically done for all tokens.
|
||||||
|
|
||||||
// key=?? ad count=657
|
// key= 10000 ad count=660
|
||||||
// key=?? ads count=431
|
// key= 10000 ads count=433
|
||||||
// key=?? mdn count=267
|
// key= 10001 google count=277
|
||||||
// key=?? google count=181
|
// key=1000000 2mdn count=267
|
||||||
// key=?? pagead2 count=166
|
// key= 10000 social count=240
|
||||||
// key=?? doubleclick count=118
|
// key= 10001 pagead2 count=166
|
||||||
// key=?? g count=100
|
// key= 10000 twitter count=122
|
||||||
// key=?? doubleclick count=94
|
// key= 10000 doubleclick count=118
|
||||||
// key=?? js count=88
|
// key= 10000 facebook count=114
|
||||||
// key=?? adv count=88
|
// key= 10000 share count=113
|
||||||
|
// key= 10000 google count=106
|
||||||
|
// key= 10001 code count=103
|
||||||
|
// key= 11000 doubleclick count=100
|
||||||
|
// key=1010001 g count=100
|
||||||
|
// key= 10001 js count= 89
|
||||||
|
// key= 10000 adv count= 88
|
||||||
|
// key= 10000 youtube count= 61
|
||||||
|
// key= 10000 plugins count= 60
|
||||||
|
// key= 10001 partner count= 59
|
||||||
|
// key= 10000 ico count= 57
|
||||||
|
// key= 110001 ssl count= 57
|
||||||
|
// key= 10000 banner count= 53
|
||||||
|
// key= 10000 footer count= 51
|
||||||
|
// key= 10000 rss count= 51
|
||||||
|
|
||||||
var FilterBucket = function(a, b) {
|
var FilterBucket = function(a, b) {
|
||||||
this.f = null;
|
this.f = null;
|
||||||
|
@ -1218,8 +1234,17 @@ FilterParser.prototype.parse = function(s) {
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
var TokenEntry = function() {
|
||||||
|
this.beg = 0;
|
||||||
|
this.end = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
var FilterContainer = function() {
|
var FilterContainer = function() {
|
||||||
this.reAnyToken = /[%0-9a-z]+/g;
|
this.reAnyToken = /[%0-9a-z]+/g;
|
||||||
|
this.tokens = [];
|
||||||
this.buckets = new Array(4);
|
this.buckets = new Array(4);
|
||||||
this.blockedAnyPartyHostnames = new µb.LiquidDict();
|
this.blockedAnyPartyHostnames = new µb.LiquidDict();
|
||||||
this.blocked3rdPartyHostnames = new µb.LiquidDict();
|
this.blocked3rdPartyHostnames = new µb.LiquidDict();
|
||||||
|
@ -1249,7 +1274,7 @@ FilterContainer.prototype.reset = function() {
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
FilterContainer.prototype.freeze = function() {
|
FilterContainer.prototype.freeze = function() {
|
||||||
//histogram('allFilters', this.categories);
|
histogram('allFilters', this.categories);
|
||||||
this.blockedAnyPartyHostnames.freeze();
|
this.blockedAnyPartyHostnames.freeze();
|
||||||
this.blocked3rdPartyHostnames.freeze();
|
this.blocked3rdPartyHostnames.freeze();
|
||||||
this.duplicates = Object.create(null);
|
this.duplicates = Object.create(null);
|
||||||
|
@ -1575,19 +1600,49 @@ FilterContainer.prototype.addToCategory = function(category, tokenKey, filter) {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
FilterContainer.prototype.matchTokens = function(url) {
|
FilterContainer.prototype.tokenize = function(url) {
|
||||||
|
var tokens = this.tokens;
|
||||||
var re = this.reAnyToken;
|
var re = this.reAnyToken;
|
||||||
var matches, beg, token, f;
|
var matches, tokenEntry;
|
||||||
|
re.lastIndex = 0;
|
||||||
|
var i = 0;
|
||||||
|
while ( matches = re.exec(url) ) {
|
||||||
|
tokenEntry = tokens[i];
|
||||||
|
if ( tokenEntry === undefined ) {
|
||||||
|
tokenEntry = tokens[i] = new TokenEntry();
|
||||||
|
}
|
||||||
|
tokenEntry.beg = matches.index;
|
||||||
|
tokenEntry.end = re.lastIndex;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
// Sentinel
|
||||||
|
tokenEntry = tokens[i];
|
||||||
|
if ( tokenEntry === undefined ) {
|
||||||
|
tokenEntry = tokens[i] = new TokenEntry();
|
||||||
|
}
|
||||||
|
tokenEntry.end = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
FilterContainer.prototype.matchTokens = function(url) {
|
||||||
var buckets = this.buckets;
|
var buckets = this.buckets;
|
||||||
var bucket0 = buckets[0];
|
var bucket0 = buckets[0];
|
||||||
var bucket1 = buckets[1];
|
var bucket1 = buckets[1];
|
||||||
var bucket2 = buckets[2];
|
var bucket2 = buckets[2];
|
||||||
var bucket3 = buckets[3];
|
var bucket3 = buckets[3];
|
||||||
|
|
||||||
re.lastIndex = 0;
|
var tokens = this.tokens;
|
||||||
while ( matches = re.exec(url) ) {
|
var tokenEntry, beg, end, token, f;
|
||||||
beg = matches.index;
|
var i = 0;
|
||||||
token = url.slice(beg, re.lastIndex);
|
for (;;) {
|
||||||
|
tokenEntry = tokens[i++];
|
||||||
|
end = tokenEntry.end;
|
||||||
|
if ( end === 0 ) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
beg = tokenEntry.beg;
|
||||||
|
token = url.slice(beg, end);
|
||||||
if ( bucket0 !== undefined ) {
|
if ( bucket0 !== undefined ) {
|
||||||
f = bucket0[token];
|
f = bucket0[token];
|
||||||
if ( f !== undefined && f.match(url, beg) !== false ) {
|
if ( f !== undefined && f.match(url, beg) !== false ) {
|
||||||
|
@ -1682,6 +1737,10 @@ FilterContainer.prototype.matchStringExactType = function(pageDetails, requestUR
|
||||||
// This will be used by hostname-based filters
|
// This will be used by hostname-based filters
|
||||||
pageHostname = pageDetails.pageHostname || '';
|
pageHostname = pageDetails.pageHostname || '';
|
||||||
|
|
||||||
|
// Tokenize only once
|
||||||
|
this.tokenize(url);
|
||||||
|
|
||||||
|
// We are testing for a specific type, skip "any type" buckets
|
||||||
buckets[0] = buckets[1] = undefined;
|
buckets[0] = buckets[1] = undefined;
|
||||||
|
|
||||||
// https://github.com/gorhill/uBlock/issues/139
|
// https://github.com/gorhill/uBlock/issues/139
|
||||||
|
@ -1763,6 +1822,9 @@ FilterContainer.prototype.matchString = function(pageDetails, requestURL, reques
|
||||||
var categories = this.categories;
|
var categories = this.categories;
|
||||||
var buckets = this.buckets;
|
var buckets = this.buckets;
|
||||||
|
|
||||||
|
// Tokenize only once
|
||||||
|
this.tokenize(url);
|
||||||
|
|
||||||
// https://github.com/gorhill/uBlock/issues/139
|
// https://github.com/gorhill/uBlock/issues/139
|
||||||
// Test against important block filters.
|
// Test against important block filters.
|
||||||
// The purpose of the `important` option is to reverse the order of
|
// The purpose of the `important` option is to reverse the order of
|
||||||
|
|
Loading…
Reference in New Issue