mirror of https://github.com/gorhill/uBlock.git
code review: convert static filtering's tokenizer to a global utility
This commit is contained in:
parent
9d06ae1223
commit
a944873b83
29
.jshintrc
29
.jshintrc
|
@ -1,21 +1,24 @@
|
||||||
{
|
{
|
||||||
"browser": true,
|
"browser": true,
|
||||||
"devel": true,
|
"devel": true,
|
||||||
|
"eqeqeq": true,
|
||||||
"esnext": true,
|
"esnext": true,
|
||||||
"globalstrict": true,
|
|
||||||
"undef": true,
|
|
||||||
"unused": true,
|
|
||||||
"nonew": false,
|
|
||||||
"sub": true,
|
|
||||||
"laxbreak": true,
|
|
||||||
"validthis": true,
|
|
||||||
"newcap": false,
|
|
||||||
"-W058": true, // suppress "Missing '()' invoking a constructor" message
|
|
||||||
"globals": {
|
"globals": {
|
||||||
|
"chrome": false,
|
||||||
|
"Components": false, // global variable in Firefox
|
||||||
|
"safari": false,
|
||||||
"self": false,
|
"self": false,
|
||||||
"vAPI": false,
|
"vAPI": false,
|
||||||
"chrome": false,
|
"µBlock": false
|
||||||
"safari": false,
|
},
|
||||||
"Components": false // global variable in Firefox
|
"globalstrict": true,
|
||||||
}
|
"laxbreak": true,
|
||||||
|
"newcap": false,
|
||||||
|
"nonew": false,
|
||||||
|
"strict": "global",
|
||||||
|
"sub": true,
|
||||||
|
"undef": true,
|
||||||
|
"unused": true,
|
||||||
|
"validthis": true,
|
||||||
|
"-W058": true // suppress "Missing '()' invoking a constructor" message
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
Home: https://github.com/gorhill/uBlock
|
Home: https://github.com/gorhill/uBlock
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* jshint bitwise: false, esnext: true */
|
/* jshint bitwise: false */
|
||||||
/* global punycode, µBlock */
|
/* global punycode */
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
@ -1657,19 +1657,10 @@ FilterParser.prototype.makeToken = function() {
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var TokenEntry = function() {
|
|
||||||
this.beg = 0;
|
|
||||||
this.token = '';
|
|
||||||
};
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
var FilterContainer = function() {
|
var FilterContainer = function() {
|
||||||
this.reAnyToken = /[%0-9a-z]+/g;
|
|
||||||
this.reIsGeneric = /[\^\*]/;
|
this.reIsGeneric = /[\^\*]/;
|
||||||
this.tokens = [];
|
|
||||||
this.filterParser = new FilterParser();
|
this.filterParser = new FilterParser();
|
||||||
|
this.urlTokenizer = µb.urlTokenizer;
|
||||||
this.reset();
|
this.reset();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2254,42 +2245,6 @@ FilterContainer.prototype.filterRegexFromCompiled = function(compiled, flags) {
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
// Since the addition of the `important` evaluation, this means it is now
|
|
||||||
// likely that the url will have to be scanned more than once. So this is
|
|
||||||
// to ensure we do it once only, and reuse results.
|
|
||||||
|
|
||||||
FilterContainer.prototype.tokenize = function(url) {
|
|
||||||
var tokens = this.tokens;
|
|
||||||
var re = this.reAnyToken;
|
|
||||||
var matches, tokenEntry;
|
|
||||||
re.lastIndex = 0;
|
|
||||||
var i = 0;
|
|
||||||
while ( (matches = re.exec(url)) ) {
|
|
||||||
tokenEntry = tokens[i];
|
|
||||||
if ( tokenEntry === undefined ) {
|
|
||||||
tokenEntry = tokens[i] = new TokenEntry();
|
|
||||||
}
|
|
||||||
tokenEntry.beg = matches.index;
|
|
||||||
tokenEntry.token = matches[0];
|
|
||||||
i += 1;
|
|
||||||
|
|
||||||
// https://github.com/chrisaljoudi/uBlock/issues/1118
|
|
||||||
// Crazy case... but I guess we have to expect the worst...
|
|
||||||
if ( i === 2048 ) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sentinel
|
|
||||||
tokenEntry = tokens[i];
|
|
||||||
if ( tokenEntry === undefined ) {
|
|
||||||
tokenEntry = tokens[i] = new TokenEntry();
|
|
||||||
}
|
|
||||||
tokenEntry.token = '';
|
|
||||||
};
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
FilterContainer.prototype.matchTokens = function(bucket, url) {
|
FilterContainer.prototype.matchTokens = function(bucket, url) {
|
||||||
// Hostname-only filters
|
// Hostname-only filters
|
||||||
var f = bucket['.'];
|
var f = bucket['.'];
|
||||||
|
@ -2299,7 +2254,7 @@ FilterContainer.prototype.matchTokens = function(bucket, url) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
var tokens = this.tokens;
|
var tokens = this.urlTokenizer.getTokens();
|
||||||
var tokenEntry, token;
|
var tokenEntry, token;
|
||||||
var i = 0;
|
var i = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
@ -2336,22 +2291,20 @@ FilterContainer.prototype.matchTokens = function(bucket, url) {
|
||||||
// not the generic handling.
|
// not the generic handling.
|
||||||
|
|
||||||
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
|
FilterContainer.prototype.matchStringExactType = function(context, requestURL, requestType) {
|
||||||
var url = requestURL.toLowerCase();
|
|
||||||
|
|
||||||
// These registers will be used by various filters
|
|
||||||
pageHostnameRegister = context.pageHostname || '';
|
|
||||||
requestHostnameRegister = µb.URI.hostnameFromURI(requestURL);
|
|
||||||
|
|
||||||
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty;
|
|
||||||
|
|
||||||
// Be prepared to support unknown types
|
// Be prepared to support unknown types
|
||||||
var type = typeNameToTypeValue[requestType] || 0;
|
var type = typeNameToTypeValue[requestType] || 0;
|
||||||
if ( type === 0 ) {
|
if ( type === 0 ) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenize only once
|
// Prime tokenizer: we get a normalized URL in return.
|
||||||
this.tokenize(url);
|
var url = this.urlTokenizer.setURL(requestURL);
|
||||||
|
|
||||||
|
// These registers will be used by various filters
|
||||||
|
pageHostnameRegister = context.pageHostname || '';
|
||||||
|
requestHostnameRegister = µb.URI.hostnameFromURI(url);
|
||||||
|
|
||||||
|
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty;
|
||||||
|
|
||||||
this.fRegister = null;
|
this.fRegister = null;
|
||||||
|
|
||||||
|
@ -2426,12 +2379,6 @@ FilterContainer.prototype.matchString = function(context) {
|
||||||
return this.matchStringExactType(context, context.requestURL, context.requestType);
|
return this.matchStringExactType(context, context.requestURL, context.requestType);
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://github.com/gorhill/httpswitchboard/issues/239
|
|
||||||
// Convert url to lower case:
|
|
||||||
// `match-case` option not supported, but then, I saw only one
|
|
||||||
// occurrence of it in all the supported lists (bulgaria list).
|
|
||||||
var url = context.requestURL.toLowerCase();
|
|
||||||
|
|
||||||
// The logic here is simple:
|
// The logic here is simple:
|
||||||
//
|
//
|
||||||
// block = !whitelisted && blacklisted
|
// block = !whitelisted && blacklisted
|
||||||
|
@ -2453,14 +2400,13 @@ FilterContainer.prototype.matchString = function(context) {
|
||||||
// filters are tested *only* if there is a (unlikely) hit on a block
|
// filters are tested *only* if there is a (unlikely) hit on a block
|
||||||
// filter.
|
// filter.
|
||||||
|
|
||||||
|
// Prime tokenizer: we get a normalized URL in return.
|
||||||
|
var url = this.urlTokenizer.setURL(context.requestURL);
|
||||||
|
|
||||||
// These registers will be used by various filters
|
// These registers will be used by various filters
|
||||||
pageHostnameRegister = context.pageHostname || '';
|
pageHostnameRegister = context.pageHostname || '';
|
||||||
requestHostnameRegister = context.requestHostname;
|
requestHostnameRegister = context.requestHostname;
|
||||||
|
|
||||||
// Tokenize only once
|
|
||||||
this.tokenize(url);
|
|
||||||
|
|
||||||
this.fRegister = null;
|
this.fRegister = null;
|
||||||
|
|
||||||
var party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty;
|
var party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty;
|
||||||
|
|
|
@ -769,7 +769,7 @@ vAPI.tabs.registerListeners();
|
||||||
if ( pageStore !== null ) {
|
if ( pageStore !== null ) {
|
||||||
state = pageStore.getNetFilteringSwitch();
|
state = pageStore.getNetFilteringSwitch();
|
||||||
if ( state && this.userSettings.showIconBadge && pageStore.perLoadBlockedRequestCount ) {
|
if ( state && this.userSettings.showIconBadge && pageStore.perLoadBlockedRequestCount ) {
|
||||||
badge = this.utils.formatCount(pageStore.perLoadBlockedRequestCount);
|
badge = this.formatCount(pageStore.perLoadBlockedRequestCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
µBlock - a browser extension to block requests.
|
uBlock Origin - a browser extension to block requests.
|
||||||
Copyright (C) 2014 Raymond Hill
|
Copyright (C) 2014-2015 Raymond Hill
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -19,22 +19,87 @@
|
||||||
Home: https://github.com/gorhill/uBlock
|
Home: https://github.com/gorhill/uBlock
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* global µBlock */
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
// This will inserted as a module in the µBlock object.
|
// A standalone URL tokenizer will allow us to use URL tokens in more than
|
||||||
|
// just static filtering engine. This opens the door to optimize other
|
||||||
|
// filtering engine parts aside static filtering. This also allows:
|
||||||
|
// - Tokenize only on demand.
|
||||||
|
// - To potentially avoid tokenizing when same URL is fed to tokenizer.
|
||||||
|
// - Benchmarking shows this to be a common occurrence.
|
||||||
|
|
||||||
µBlock.utils = (function() {
|
µBlock.urlTokenizer = {
|
||||||
|
setURL: function(url) {
|
||||||
|
if ( url !== this._urlIn ) {
|
||||||
|
this._urlIn = url;
|
||||||
|
this._urlOut = url.toLowerCase();
|
||||||
|
this._tokenized = false;
|
||||||
|
}
|
||||||
|
return this._urlOut;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Tokenize on demand.
|
||||||
|
getTokens: function() {
|
||||||
|
if ( this._tokenized === false ) {
|
||||||
|
this._tokenize();
|
||||||
|
this._tokenized = true;
|
||||||
|
}
|
||||||
|
return this._tokens;
|
||||||
|
},
|
||||||
|
|
||||||
|
isTokenized: function() {
|
||||||
|
return this._tokens !== null && this._tokens[0].token !== '';
|
||||||
|
},
|
||||||
|
|
||||||
|
_Entry: function() {
|
||||||
|
this.beg = 0;
|
||||||
|
this.token = '';
|
||||||
|
},
|
||||||
|
|
||||||
|
// https://github.com/chrisaljoudi/uBlock/issues/1118
|
||||||
|
// We limit to a maximum number of tokens.
|
||||||
|
_init: function() {
|
||||||
|
this._tokens = new Array(2048);
|
||||||
|
for ( var i = 0; i < 2048; i++ ) {
|
||||||
|
this._tokens[i] = new this._Entry();
|
||||||
|
}
|
||||||
|
|
||||||
|
this._init = null;
|
||||||
|
},
|
||||||
|
|
||||||
|
_tokenize: function() {
|
||||||
|
var tokens = this._tokens,
|
||||||
|
re = this._reAnyToken,
|
||||||
|
url = this._urlOut;
|
||||||
|
var matches, entry;
|
||||||
|
re.lastIndex = 0;
|
||||||
|
|
||||||
|
for ( var i = 0; i < 2047; i++ ) {
|
||||||
|
matches = re.exec(url);
|
||||||
|
if ( matches === null ) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
entry = tokens[i];
|
||||||
|
entry.beg = matches.index;
|
||||||
|
entry.token = matches[0];
|
||||||
|
}
|
||||||
|
tokens[i].token = ''; // Sentinel
|
||||||
|
},
|
||||||
|
|
||||||
|
_urlIn: '',
|
||||||
|
_urlOut: '',
|
||||||
|
_tokenized: false,
|
||||||
|
_tokens: null,
|
||||||
|
_reAnyToken: /[%0-9a-z]+/g
|
||||||
|
};
|
||||||
|
|
||||||
|
µBlock.urlTokenizer._init();
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
var exports = {};
|
µBlock.formatCount = function(count) {
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
exports.formatCount = function(count) {
|
|
||||||
if ( typeof count !== 'number' ) {
|
if ( typeof count !== 'number' ) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
@ -58,11 +123,3 @@ exports.formatCount = function(count) {
|
||||||
// https://www.youtube.com/watch?v=DyvzfyqYm_s
|
// https://www.youtube.com/watch?v=DyvzfyqYm_s
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
return exports;
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
})();
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
Loading…
Reference in New Issue