uBlock/js/net-filtering.js

1839 lines
57 KiB
JavaScript
Raw Normal View History

2014-06-23 16:42:43 -06:00
/*******************************************************************************
µBlock - a Chromium browser extension to block requests.
Copyright (C) 2014 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
/* jshint esnext: true, bitwise: false */
/* global µBlock */
/******************************************************************************/
2014-09-08 15:46:58 -06:00
µBlock.netFilteringEngine = (function(){
2014-06-23 16:42:43 -06:00
/******************************************************************************/
2014-09-14 14:20:40 -06:00
var µb = µBlock;
2014-07-14 09:24:59 -06:00
// fedcba9876543210
2014-09-20 08:44:04 -06:00
// | | | |||
// | | | |||
// | | | |||
// | | | |||
// | | | ||+---- bit 0: [BlockAction | AllowAction]
// | | | |+---- bit 1: `important`
// | | | +---- bit 2-3: party [0 - 3]
// | | +---- bit 4-7: type [0 - 15]
// | +---- bit 8-15: unused
// +---- bit 15: never use! (to ensure valid unicode character)
const BlockAction = 0 << 0;
const AllowAction = 1 << 0;
2014-08-28 07:59:05 -06:00
const ToggleAction = BlockAction ^ AllowAction;
2014-06-23 16:42:43 -06:00
2014-09-20 08:44:04 -06:00
const Important = 1 << 1;
2014-06-23 16:42:43 -06:00
2014-09-20 08:44:04 -06:00
const AnyParty = 0 << 2;
const FirstParty = 1 << 2;
const ThirdParty = 2 << 2;
const SpecificParty = 3 << 2;
2014-06-23 16:42:43 -06:00
const BlockAnyTypeAnyParty = BlockAction | AnyType | AnyParty;
const BlockAnyType1stParty = BlockAction | AnyType | FirstParty;
const BlockAnyType3rdParty = BlockAction | AnyType | ThirdParty;
const BlockAnyTypeOneParty = BlockAction | AnyType | SpecificParty;
const BlockAnyType = BlockAction | AnyType;
const BlockAnyParty = BlockAction | AnyParty;
const BlockOneParty = BlockAction | SpecificParty;
const AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty;
const AllowAnyType1stParty = AllowAction | AnyType | FirstParty;
const AllowAnyType3rdParty = AllowAction | AnyType | ThirdParty;
const AllowAnyTypeOneParty = AllowAction | AnyType | SpecificParty;
const AllowAnyType = AllowAction | AnyType;
const AllowAnyParty = AllowAction | AnyParty;
const AllowOneParty = AllowAction | SpecificParty;
2014-09-20 08:44:04 -06:00
const AnyType = 1 << 4;
var typeNameToTypeValue = {
'stylesheet': 2 << 4,
'image': 3 << 4,
'object': 4 << 4,
'script': 5 << 4,
'xmlhttprequest': 6 << 4,
'sub_frame': 7 << 4,
'other': 8 << 4,
'popup': 9 << 4
};
2014-06-23 16:42:43 -06:00
var pageHostname = '';
var reIgnoreEmpty = /^\s+$/;
var reIgnoreComment = /^\[|^!/;
var reHostnameRule = /^[0-9a-z][0-9a-z.-]+[0-9a-z]$/;
var reHostnameToken = /^[0-9a-z]+/g;
var reGoodToken = /[%0-9a-z]{2,}/g;
2014-09-19 08:59:44 -06:00
var reURLPostHostnameAnchors = /[\/?#]/;
2014-06-23 16:42:43 -06:00
// ABP filters: https://adblockplus.org/en/filters
// regex tester: http://regex101.com/
/******************************************************************************/
/*
var histogram = function(label, categories) {
var h = [],
categoryBucket;
for ( var k in categories ) {
if ( categories.hasOwnProperty(k) === false ) {
continue;
}
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
if ( categoryBucket.hasOwnProperty(kk) === false ) {
continue;
}
filterBucket = categoryBucket[kk];
h.push({
k: k + ' ' + kk,
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
2014-09-08 15:46:58 -06:00
/******************************************************************************/
// Could be replaced with encodeURIComponent/decodeURIComponent,
// which seems faster on Firefox.
var encode = JSON.stringify;
var decode = JSON.parse;
var cachedParseInt = parseInt;
var atoi = function(s) {
return cachedParseInt(s, 10);
};
2014-06-23 16:42:43 -06:00
/*******************************************************************************
Filters family tree:
- plain (no wildcard)
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 08:59:44 -06:00
- anchored within hostname
- no hostname
- specific hostname (not implemented)
2014-06-23 16:42:43 -06:00
- one wildcard
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 08:59:44 -06:00
- anchored within hostname
- no hostname (not implemented)
- specific hostname (not implemented)
2014-06-23 16:42:43 -06:00
- more than one wildcard
- anywhere
- no hostname
- specific hostname
- anchored at start
- no hostname
- specific hostname
- anchored at end
- no hostname
- specific hostname
2014-09-19 08:59:44 -06:00
- anchored within hostname
- no hostname (not implemented)
- specific hostname (not implemented)
2014-06-23 16:42:43 -06:00
*/
/******************************************************************************/
var FilterPlain = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
};
FilterPlain.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlain.prototype.fid = 'a';
2014-08-28 07:59:05 -06:00
FilterPlain.prototype.toString = function() {
return this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlain.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg;
};
FilterPlain.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlain(s.slice(0, pos), atoi(s.slice(pos + 1)));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterPlainHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.hostname = hostname;
};
FilterPlainHostname.prototype.match = function(url, tokenBeg) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg - this.tokenBeg, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainHostname.prototype.fid = 'ah';
2014-08-28 07:59:05 -06:00
FilterPlainHostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterPlainHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterPlainHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterPlainHostname(args[0], atoi(args[1]), args[2]);
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var FilterPlainPrefix0 = function(s) {
this.s = s;
};
FilterPlainPrefix0.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix0.prototype.fid = '0a';
2014-08-28 07:59:05 -06:00
FilterPlainPrefix0.prototype.toString = function() {
return this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix0.prototype.toSelfie = function() {
return this.s;
};
FilterPlainPrefix0.fromSelfie = function(s) {
return new FilterPlainPrefix0(s);
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterPlainPrefix0Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix0Hostname.prototype.match = function(url, tokenBeg) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix0Hostname.prototype.fid = '0ah';
2014-08-28 07:59:05 -06:00
FilterPlainPrefix0Hostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix0Hostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainPrefix0Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix0Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var FilterPlainPrefix1 = function(s) {
this.s = s;
};
FilterPlainPrefix1.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg - 1, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix1.prototype.fid = '1a';
2014-08-28 07:59:05 -06:00
FilterPlainPrefix1.prototype.toString = function() {
return this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix1.prototype.toSelfie = function() {
return this.s;
};
FilterPlainPrefix1.fromSelfie = function(s) {
return new FilterPlainPrefix1(s);
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterPlainPrefix1Hostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainPrefix1Hostname.prototype.match = function(url, tokenBeg) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg - 1, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix1Hostname.prototype.fid = '1ah';
2014-08-28 07:59:05 -06:00
FilterPlainPrefix1Hostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterPlainPrefix1Hostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainPrefix1Hostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainPrefix1Hostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var FilterPlainLeftAnchored = function(s) {
this.s = s;
};
FilterPlainLeftAnchored.prototype.match = function(url) {
return url.slice(0, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainLeftAnchored.prototype.fid = '|a';
2014-08-28 07:59:05 -06:00
FilterPlainLeftAnchored.prototype.toString = function() {
return '|' + this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainLeftAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainLeftAnchored.fromSelfie = function(s) {
return new FilterPlainLeftAnchored(s);
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterPlainLeftAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainLeftAnchoredHostname.prototype.match = function(url) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(0, this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainLeftAnchoredHostname.prototype.fid = '|ah';
2014-08-28 07:59:05 -06:00
FilterPlainLeftAnchoredHostname.prototype.toString = function() {
return '|' + this.s + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterPlainLeftAnchoredHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainLeftAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainLeftAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var FilterPlainRightAnchored = function(s) {
this.s = s;
};
FilterPlainRightAnchored.prototype.match = function(url) {
return url.slice(-this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainRightAnchored.prototype.fid = 'a|';
2014-08-28 07:59:05 -06:00
FilterPlainRightAnchored.prototype.toString = function() {
return this.s + '|';
};
2014-09-08 15:46:58 -06:00
FilterPlainRightAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainRightAnchored.fromSelfie = function(s) {
return new FilterPlainRightAnchored(s);
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterPlainRightAnchoredHostname = function(s, hostname) {
this.s = s;
this.hostname = hostname;
};
FilterPlainRightAnchoredHostname.prototype.match = function(url) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(-this.s.length) === this.s;
};
2014-09-08 15:46:58 -06:00
FilterPlainRightAnchoredHostname.prototype.fid = 'a|h';
2014-08-28 07:59:05 -06:00
FilterPlainRightAnchoredHostname.prototype.toString = function() {
return this.s + '|$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterPlainRightAnchoredHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.hostname;
};
FilterPlainRightAnchoredHostname.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterPlainRightAnchoredHostname(s.slice(0, pos), s.slice(pos + 1));
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
2014-09-19 08:59:44 -06:00
// https://github.com/gorhill/uBlock/issues/235
// The filter is left-anchored somewhere within the hostname part of the URL.
var FilterPlainHnAnchored = function(s) {
this.s = s;
};
FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) {
if ( url.substr(tokenBeg, this.s.length) !== this.s ) {
return false;
}
// Valid only if hostname-valid characters to the left of token
var pos = url.indexOf('://');
return pos !== -1 &&
reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false;
};
FilterPlainHnAnchored.prototype.fid = 'h|a';
FilterPlainHnAnchored.prototype.toString = function() {
return '||' + this.s;
};
FilterPlainHnAnchored.prototype.toSelfie = function() {
return this.s;
};
FilterPlainHnAnchored.fromSelfie = function(s) {
return new FilterPlainHnAnchored(s);
};
// https://www.youtube.com/watch?v=71YS6xDB-E4
/******************************************************************************/
2014-06-23 16:42:43 -06:00
// With a single wildcard, regex is not optimal.
// See:
// http://jsperf.com/regexp-vs-indexof-abp-miss/3
// http://jsperf.com/regexp-vs-indexof-abp-hit/3
2014-09-08 15:46:58 -06:00
var FilterSingleWildcard = function(lSegment, rSegment, tokenBeg) {
2014-06-23 16:42:43 -06:00
this.tokenBeg = tokenBeg;
2014-09-08 15:46:58 -06:00
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
};
FilterSingleWildcard.prototype.match = function(url, tokenBeg) {
tokenBeg -= this.tokenBeg;
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcard.prototype.fid = '*';
2014-08-28 07:59:05 -06:00
FilterSingleWildcard.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcard.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.tokenBeg;
};
FilterSingleWildcard.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcard(args[0], args[1], atoi(args[2]));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-09-08 15:46:58 -06:00
var FilterSingleWildcardHostname = function(lSegment, rSegment, tokenBeg, hostname) {
2014-06-23 16:42:43 -06:00
this.tokenBeg = tokenBeg;
2014-09-08 15:46:58 -06:00
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
this.hostname = hostname;
};
FilterSingleWildcardHostname.prototype.match = function(url, tokenBeg) {
tokenBeg -= this.tokenBeg;
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardHostname.prototype.fid = '*h';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardHostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterSingleWildcardHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardHostname(args[0], args[1], atoi(args[2]), args[3]);
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
2014-09-08 15:46:58 -06:00
var FilterSingleWildcardPrefix0 = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
};
FilterSingleWildcardPrefix0.prototype.match = function(url, tokenBeg) {
return url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardPrefix0.prototype.fid = '0*';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardPrefix0.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardPrefix0.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardPrefix0.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardPrefix0(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-09-08 15:46:58 -06:00
var FilterSingleWildcardPrefix0Hostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
this.hostname = hostname;
};
FilterSingleWildcardPrefix0Hostname.prototype.match = function(url, tokenBeg) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.substr(tokenBeg, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardPrefix0Hostname.prototype.fid = '0*h';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardPrefix0Hostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardPrefix0Hostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
2014-06-23 16:42:43 -06:00
2014-09-08 15:46:58 -06:00
FilterSingleWildcardPrefix0Hostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardPrefix0Hostname(args[0], args[1], args[2]);
};
2014-06-23 16:42:43 -06:00
2014-09-08 15:46:58 -06:00
/******************************************************************************/
var FilterSingleWildcardLeftAnchored = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
};
FilterSingleWildcardLeftAnchored.prototype.match = function(url) {
return url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardLeftAnchored.prototype.fid = '|*';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardLeftAnchored.prototype.toString = function() {
return '|' + this.lSegment + '*' + this.rSegment;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardLeftAnchored.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardLeftAnchored.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardLeftAnchored(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-09-08 15:46:58 -06:00
var FilterSingleWildcardLeftAnchoredHostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
this.hostname = hostname;
};
FilterSingleWildcardLeftAnchoredHostname.prototype.match = function(url) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(0, this.lSegment.length) === this.lSegment &&
url.indexOf(this.rSegment, this.lSegment.length) > 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardLeftAnchoredHostname.prototype.fid = '|*h';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardLeftAnchoredHostname.prototype.toString = function() {
return '|' + this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardLeftAnchoredHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
2014-06-23 16:42:43 -06:00
2014-09-08 15:46:58 -06:00
FilterSingleWildcardLeftAnchoredHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardLeftAnchoredHostname(args[0], args[1], args[2]);
};
2014-06-23 16:42:43 -06:00
2014-09-08 15:46:58 -06:00
/******************************************************************************/
var FilterSingleWildcardRightAnchored = function(lSegment, rSegment) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
};
FilterSingleWildcardRightAnchored.prototype.match = function(url) {
return url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardRightAnchored.prototype.fid = '*|';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardRightAnchored.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '|';
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardRightAnchored.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment;
};
FilterSingleWildcardRightAnchored.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterSingleWildcardRightAnchored(s.slice(0, pos), s.slice(pos + 1));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-09-08 15:46:58 -06:00
var FilterSingleWildcardRightAnchoredHostname = function(lSegment, rSegment, hostname) {
this.lSegment = lSegment;
this.rSegment = rSegment;
2014-06-23 16:42:43 -06:00
this.hostname = hostname;
};
FilterSingleWildcardRightAnchoredHostname.prototype.match = function(url) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
url.slice(-this.rSegment.length) === this.rSegment &&
url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardRightAnchoredHostname.prototype.fid = '*|h';
2014-08-28 07:59:05 -06:00
FilterSingleWildcardRightAnchoredHostname.prototype.toString = function() {
return this.lSegment + '*' + this.rSegment + '|$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterSingleWildcardRightAnchoredHostname.prototype.toSelfie = function() {
return this.lSegment + '\t' +
this.rSegment + '\t' +
this.hostname;
};
FilterSingleWildcardRightAnchoredHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterSingleWildcardRightAnchoredHostname(args[0], args[1], args[2]);
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
// With many wildcards, a regex is best.
// Ref: regex escaper taken from:
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
// modified for the purpose here.
var FilterManyWildcards = function(s, tokenBeg) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
};
FilterManyWildcards.prototype.match = function(url, tokenBeg) {
return this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
2014-09-08 15:46:58 -06:00
FilterManyWildcards.prototype.fid = '*+';
2014-08-28 07:59:05 -06:00
FilterManyWildcards.prototype.toString = function() {
return this.s;
};
2014-09-08 15:46:58 -06:00
FilterManyWildcards.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg;
};
FilterManyWildcards.fromSelfie = function(s) {
var pos = s.indexOf('\t');
return new FilterManyWildcards(s.slice(0, pos), atoi(s.slice(pos + 1)));
};
2014-08-28 07:59:05 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
var FilterManyWildcardsHostname = function(s, tokenBeg, hostname) {
this.s = s;
this.tokenBeg = tokenBeg;
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
this.hostname = hostname;
};
FilterManyWildcardsHostname.prototype.match = function(url, tokenBeg) {
return pageHostname.slice(-this.hostname.length) === this.hostname &&
this.re.test(url.slice(tokenBeg - this.tokenBeg));
};
2014-09-08 15:46:58 -06:00
FilterManyWildcardsHostname.prototype.fid = '*+h';
2014-08-28 07:59:05 -06:00
FilterManyWildcardsHostname.prototype.toString = function() {
return this.s + '$domain=' + this.hostname;
};
2014-09-08 15:46:58 -06:00
FilterManyWildcardsHostname.prototype.toSelfie = function() {
return this.s + '\t' +
this.tokenBeg + '\t' +
this.hostname;
};
FilterManyWildcardsHostname.fromSelfie = function(s) {
var args = s.split('\t');
return new FilterManyWildcardsHostname(args[0], atoi(args[1]), args[2]);
};
/******************************************************************************/
2014-09-19 08:59:44 -06:00
// TODO: Some buckets may grow quite large (see histogram excerpt below).
// Evaluate the gain from having an internal dictionary for such large
// buckets: the key would be created by concatenating the char preceding and
// following the token. The dict would contain smaller buckets, and there
// would be a special bucket for those filters for which a prefix, suffix, or
// both is missing.
// I used to do this, but at a higher level, during tokenization, and in the
// end I found out the overhead was to much. I believe it will be a gain
// here because the special treatment would be only for a few specific tokens,
// not systematically done for all tokens.
2014-09-20 08:44:04 -06:00
// key=?? ad count=657
// key=?? ads count=431
// key=?? mdn count=267
// key=?? google count=181
// key=?? pagead2 count=166
// key=?? doubleclick count=118
// key=?? g count=100
// key=?? doubleclick count=94
// key=?? js count=88
// key=?? adv count=88
2014-09-19 08:59:44 -06:00
2014-09-08 15:46:58 -06:00
var FilterBucket = function(a, b) {
this.f = null;
this.filters = [];
if ( a !== undefined ) {
this.filters[0] = a;
if ( b !== undefined ) {
this.filters[1] = b;
}
}
};
FilterBucket.prototype.add = function(a) {
2014-09-20 08:44:04 -06:00
// If filter count > n, create dictionary in which filter buckets will be
// keyed on prefix-suffix string. There will be a special bucket, always
// evaluated for those filters who can't supply a two-char keys.
2014-09-08 15:46:58 -06:00
this.filters.push(a);
};
FilterBucket.prototype.match = function(url, tokenBeg) {
var filters = this.filters;
var i = filters.length;
while ( i-- ) {
if ( filters[i].match(url, tokenBeg) !== false ) {
this.f = filters[i];
return true;
}
}
return false;
};
FilterBucket.prototype.fid = '[]';
FilterBucket.prototype.toString = function() {
if ( this.f !== null ) {
return this.f.toString();
}
return '';
};
FilterBucket.prototype.toSelfie = function() {
return this.filters.length.toString();
};
FilterBucket.fromSelfie = function() {
return new FilterBucket();
};
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var makeFilter = function(details, tokenBeg) {
var s = details.f;
var wcOffset = s.indexOf('*');
2014-09-08 15:46:58 -06:00
if ( wcOffset !== -1 ) {
if ( s.indexOf('*', wcOffset + 1) !== -1 ) {
2014-06-23 16:42:43 -06:00
return details.anchor === 0 ? new FilterManyWildcards(s, tokenBeg) : null;
}
2014-09-08 15:46:58 -06:00
var lSegment = s.slice(0, wcOffset);
var rSegment = s.slice(wcOffset + 1);
2014-06-23 16:42:43 -06:00
if ( details.anchor < 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardLeftAnchored(lSegment, rSegment);
2014-06-23 16:42:43 -06:00
}
if ( details.anchor > 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardRightAnchored(lSegment, rSegment);
2014-06-23 16:42:43 -06:00
}
if ( tokenBeg === 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardPrefix0(lSegment, rSegment);
2014-06-23 16:42:43 -06:00
}
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcard(lSegment, rSegment, tokenBeg);
2014-06-23 16:42:43 -06:00
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchored(s);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchored(s);
}
2014-09-19 08:59:44 -06:00
if ( details.hostnameAnchored ) {
return new FilterPlainHnAnchored(s);
}
2014-06-23 16:42:43 -06:00
if ( tokenBeg === 0 ) {
return new FilterPlainPrefix0(s);
}
if ( tokenBeg === 1 ) {
return new FilterPlainPrefix1(s);
}
return new FilterPlain(s, tokenBeg);
};
/******************************************************************************/
var makeHostnameFilter = function(details, tokenBeg, hostname) {
var s = details.f;
var wcOffset = s.indexOf('*');
2014-09-08 15:46:58 -06:00
if ( wcOffset !== -1 ) {
if ( s.indexOf('*', wcOffset + 1) !== -1 ) {
2014-06-23 16:42:43 -06:00
return details.anchor === 0 ? new FilterManyWildcardsHostname(s, tokenBeg, hostname) : null;
}
2014-09-08 15:46:58 -06:00
var lSegment = s.slice(0, wcOffset);
var rSegment = s.slice(wcOffset + 1);
2014-06-23 16:42:43 -06:00
if ( details.anchor < 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardLeftAnchoredHostname(lSegment, rSegment, hostname);
2014-06-23 16:42:43 -06:00
}
if ( details.anchor > 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardRightAnchoredHostname(lSegment, rSegment, hostname);
2014-06-23 16:42:43 -06:00
}
if ( tokenBeg === 0 ) {
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardPrefix0Hostname(lSegment, rSegment, hostname);
2014-06-23 16:42:43 -06:00
}
2014-09-08 15:46:58 -06:00
return new FilterSingleWildcardHostname(lSegment, rSegment, tokenBeg, hostname);
2014-06-23 16:42:43 -06:00
}
if ( details.anchor < 0 ) {
return new FilterPlainLeftAnchoredHostname(s, hostname);
}
if ( details.anchor > 0 ) {
return new FilterPlainRightAnchoredHostname(s, hostname);
}
if ( tokenBeg === 0 ) {
return new FilterPlainPrefix0Hostname(s, hostname);
}
if ( tokenBeg === 1 ) {
return new FilterPlainPrefix1Hostname(s, hostname);
}
return new FilterPlainHostname(s, tokenBeg, hostname);
};
/******************************************************************************/
// Given a string, find a good token. Tokens which are too generic, i.e. very
// common with a high probability of ending up as a miss, are not
// good. Avoid if possible. This has a *significant* positive impact on
// performance.
// These "bad tokens" are collated manually.
var badTokens = {
'com': true,
'http': true,
'https': true,
2014-09-08 15:46:58 -06:00
'icon': true,
2014-06-23 16:42:43 -06:00
'images': true,
'img': true,
'js': true,
'net': true,
2014-06-23 16:42:43 -06:00
'news': true,
'www': true
};
var findFirstGoodToken = function(s) {
reGoodToken.lastIndex = 0;
var matches;
while ( matches = reGoodToken.exec(s) ) {
if ( badTokens[matches[0]] === undefined ) {
return matches;
}
}
// No good token found, just return the first token from left
reGoodToken.lastIndex = 0;
return reGoodToken.exec(s);
};
/******************************************************************************/
var findHostnameToken = function(s) {
reHostnameToken.lastIndex = 0;
return reHostnameToken.exec(s);
};
/******************************************************************************/
// Trim leading/trailing char "c"
var trimChar = function(s, c) {
// Remove leading and trailing wildcards
var pos = 0;
while ( s.charAt(pos) === c ) {
pos += 1;
}
s = s.slice(pos);
if ( pos = s.length ) {
while ( s.charAt(pos-1) === c ) {
pos -= 1;
}
s = s.slice(0, pos);
}
return s;
};
2014-09-08 15:46:58 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
/******************************************************************************/
var FilterParser = function() {
this.hostnames = [];
this.types = [];
2014-08-28 07:59:05 -06:00
this.reset();
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
FilterParser.prototype.toNormalizedType = {
'stylesheet': 'stylesheet',
'image': 'image',
'object': 'object',
'object-subrequest': 'object',
'script': 'script',
'xmlhttprequest': 'xmlhttprequest',
'subdocument': 'sub_frame',
2014-07-14 09:24:59 -06:00
'other': 'other',
'popup': 'popup'
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.elemHiding = false;
this.f = '';
this.firstParty = false;
this.fopts = '';
2014-09-19 08:59:44 -06:00
this.hostnameAnchored = false;
this.hostnamePure = false;
2014-08-28 07:59:05 -06:00
this.hostnames.length = 0;
this.notHostname = false;
2014-06-23 16:42:43 -06:00
this.thirdParty = false;
2014-08-28 07:59:05 -06:00
this.types.length = 0;
2014-08-29 13:02:31 -06:00
this.important = 0;
2014-06-23 16:42:43 -06:00
this.unsupported = false;
return this;
};
/******************************************************************************/
FilterParser.prototype.parseOptType = function(raw, not) {
var type = this.toNormalizedType[raw];
if ( not ) {
for ( var k in typeNameToTypeValue ) {
2014-07-30 22:17:18 -06:00
if ( k === type ) { continue; }
// https://github.com/gorhill/uBlock/issues/121
2014-07-30 22:18:02 -06:00
// `popup` is a special type, it cannot be set for filters intended
2014-07-30 22:21:25 -06:00
// for real net request types. The test is safe since there is no
// such thing as a filter using `~popup`.
2014-07-30 22:17:18 -06:00
if ( k === 'popup' ) { continue; }
2014-06-23 16:42:43 -06:00
this.types.push(typeNameToTypeValue[k]);
}
} else {
this.types.push(typeNameToTypeValue[type]);
}
};
/******************************************************************************/
FilterParser.prototype.parseOptParty = function(not) {
if ( not ) {
this.firstParty = true;
} else {
this.thirdParty = true;
}
};
/******************************************************************************/
FilterParser.prototype.parseOptHostnames = function(raw) {
var hostnames = raw.split('|');
2014-09-19 14:25:45 -06:00
var hostname, not;
2014-06-23 16:42:43 -06:00
for ( var i = 0; i < hostnames.length; i++ ) {
hostname = hostnames[i];
not = hostname.charAt(0) === '~';
if ( not ) {
hostname = hostname.slice(1);
}
2014-08-28 07:59:05 -06:00
// https://github.com/gorhill/uBlock/issues/191
// Well it doesn't seem to make a whole lot of sense to have both
// non-negated hostnames mixed with negated hostnames.
if ( this.hostnames.length !== 0 && not !== this.notHostname ) {
console.error('FilterContainer.parseOptHostnames(): ambiguous filter syntax: "%s"', this.f);
this.unsupported = true;
return;
2014-06-23 16:42:43 -06:00
}
2014-08-28 07:59:05 -06:00
this.notHostname = not;
this.hostnames.push(hostname);
2014-06-23 16:42:43 -06:00
}
};
/******************************************************************************/
FilterParser.prototype.parse = function(s) {
// important!
this.reset();
2014-09-19 08:59:44 -06:00
if ( reHostnameRule.test(s) ) {
this.f = s;
this.hostnamePure = this.hostnameAnchored = true;
return this;
}
2014-06-23 16:42:43 -06:00
// element hiding filter?
if ( s.indexOf('##') >= 0 || s.indexOf('#@') >= 0 ) {
this.elemHiding = true;
return this;
}
// block or allow filter?
if ( s.slice(0, 2) === '@@' ) {
this.action = AllowAction;
s = s.slice(2);
}
2014-09-08 15:46:58 -06:00
// options
var pos = s.indexOf('$');
if ( pos > 0 ) {
this.fopts = s.slice(pos + 1);
s = s.slice(0, pos);
}
// regex? (not supported)
if ( s.charAt(0) === '/' && s.slice(-1) === '/' ) {
this.unsupported = true;
return this;
}
2014-06-23 16:42:43 -06:00
// hostname anchoring
if ( s.slice(0, 2) === '||' ) {
2014-09-19 08:59:44 -06:00
this.hostnameAnchored = true;
2014-06-23 16:42:43 -06:00
s = s.slice(2);
}
// left-anchored
if ( s.charAt(0) === '|' ) {
this.anchor = -1;
s = s.slice(1);
}
// right-anchored
if ( s.slice(-1) === '|' ) {
this.anchor = 1;
s = s.slice(0, -1);
}
// normalize placeholders
// TODO: transforming `^` into `*` is not a strict interpretation of
// ABP syntax.
s = s.replace(/\^/g, '*');
s = s.replace(/\*\*+/g, '*');
// remove leading and trailing wildcards
2014-09-19 08:59:44 -06:00
s = trimChar(s, '*');
// pure hostname-based?
this.hostnamePure = this.hostnameAnchored && reHostnameRule.test(s);
this.f = s;
2014-06-23 16:42:43 -06:00
if ( !this.fopts ) {
return this;
}
// parse options
var opts = this.fopts.split(',');
var opt, not;
for ( var i = 0; i < opts.length; i++ ) {
opt = opts[i];
not = opt.charAt(0) === '~';
if ( not ) {
opt = opt.slice(1);
}
if ( opt === 'third-party' ) {
this.parseOptParty(not);
continue;
}
if ( this.toNormalizedType.hasOwnProperty(opt) ) {
this.parseOptType(opt, not);
continue;
}
if ( opt.slice(0,7) === 'domain=' ) {
this.parseOptHostnames(opt.slice(7));
continue;
}
if ( opt === 'popup' ) {
2014-07-14 09:24:59 -06:00
this.parseOptType('popup', not);
2014-08-29 13:02:31 -06:00
continue;
}
if ( opt === 'important' ) {
this.important = Important;
continue;
2014-06-23 16:42:43 -06:00
}
this.unsupported = true;
2014-08-29 13:02:31 -06:00
break;
2014-06-23 16:42:43 -06:00
}
return this;
};
/******************************************************************************/
/******************************************************************************/
var FilterContainer = function() {
2014-07-20 13:00:26 -06:00
this.reAnyToken = /[%0-9a-z]+/g;
2014-09-19 15:39:25 -06:00
this.buckets = new Array(4);
2014-09-14 14:20:40 -06:00
this.blockedAnyPartyHostnames = new µb.LiquidDict();
this.blocked3rdPartyHostnames = new µb.LiquidDict();
2014-06-23 16:42:43 -06:00
this.filterParser = new FilterParser();
2014-07-20 13:00:26 -06:00
this.reset();
};
/******************************************************************************/
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
this.frozen = false;
2014-06-23 16:42:43 -06:00
this.processedFilterCount = 0;
this.acceptedCount = 0;
2014-09-08 15:46:58 -06:00
this.rejectedCount = 0;
2014-06-23 16:42:43 -06:00
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.duplicateCount = 0;
2014-07-20 13:00:26 -06:00
this.categories = {};
this.duplicates = {};
this.blockedAnyPartyHostnames.reset();
this.blocked3rdPartyHostnames.reset();
this.filterParser.reset();
};
2014-06-23 16:42:43 -06:00
2014-07-20 13:00:26 -06:00
/******************************************************************************/
2014-06-23 16:42:43 -06:00
2014-07-20 13:00:26 -06:00
FilterContainer.prototype.freeze = function() {
//histogram('allFilters', this.categories);
this.blockedAnyPartyHostnames.freeze();
this.blocked3rdPartyHostnames.freeze();
this.duplicates = {};
this.filterParser.reset();
this.frozen = true;
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
2014-09-08 15:46:58 -06:00
FilterContainer.prototype.toSelfie = function() {
var categoryToSelfie = function(dict) {
var selfie = [];
var bucket, ff, n, i, f;
for ( var k in dict ) {
if ( dict.hasOwnProperty(k) === false ) {
continue;
}
// We need to encode the key because there could be a `\n` or '\t'
// character in it, which would trip the code at parse time.
selfie.push('k2\t' + encode(k));
bucket = dict[k];
selfie.push(bucket.fid + '\t' + bucket.toSelfie());
if ( bucket.fid !== '[]' ) {
continue;
}
ff = bucket.filters;
n = ff.length;
for ( i = 0; i < n; i++ ) {
f = ff[i];
selfie.push(f.fid + '\t' + f.toSelfie());
}
}
return selfie.join('\n');
};
var categoriesToSelfie = function(dict) {
var selfie = [];
for ( var k in dict ) {
if ( dict.hasOwnProperty(k) === false ) {
continue;
}
// We need to encode the key because there could be a `\n` or '\t'
// character in it, which would trip the code at parse time.
selfie.push('k1\t' + encode(k));
selfie.push(categoryToSelfie(dict[k]));
}
return selfie.join('\n');
};
return {
processedFilterCount: this.processedFilterCount,
acceptedCount: this.acceptedCount,
rejectedCount: this.rejectedCount,
allowFilterCount: this.allowFilterCount,
blockFilterCount: this.blockFilterCount,
duplicateCount: this.duplicateCount,
categories: categoriesToSelfie(this.categories),
blockedAnyPartyHostnames: this.blockedAnyPartyHostnames.toSelfie(),
blocked3rdPartyHostnames: this.blocked3rdPartyHostnames.toSelfie()
};
};
/******************************************************************************/
FilterContainer.prototype.fromSelfie = function(selfie) {
this.frozen = true;
this.processedFilterCount = selfie.processedFilterCount;
this.acceptedCount = selfie.acceptedCount;
this.rejectedCount = selfie.rejectedCount;
this.allowFilterCount = selfie.allowFilterCount;
this.blockFilterCount = selfie.blockFilterCount;
this.duplicateCount = selfie.duplicateCount;
this.blockedAnyPartyHostnames.fromSelfie(selfie.blockedAnyPartyHostnames);
this.blocked3rdPartyHostnames.fromSelfie(selfie.blocked3rdPartyHostnames);
var factories = {
'[]': FilterBucket,
'a': FilterPlain,
'ah': FilterPlainHostname,
'0a': FilterPlainPrefix0,
'0ah': FilterPlainPrefix0Hostname,
'1a': FilterPlainPrefix1,
'1ah': FilterPlainPrefix1Hostname,
'|a': FilterPlainLeftAnchored,
'|ah': FilterPlainLeftAnchoredHostname,
'a|': FilterPlainRightAnchored,
'a|h': FilterPlainRightAnchoredHostname,
2014-09-19 08:59:44 -06:00
'h|a': FilterPlainHnAnchored,
2014-09-08 15:46:58 -06:00
'*': FilterSingleWildcard,
'*h': FilterSingleWildcardHostname,
'0*': FilterSingleWildcardPrefix0,
'0*h': FilterSingleWildcardPrefix0Hostname,
'|*': FilterSingleWildcardLeftAnchored,
'|*h': FilterSingleWildcardLeftAnchoredHostname,
'*|': FilterSingleWildcardRightAnchored,
'*|h': FilterSingleWildcardRightAnchoredHostname,
'*+': FilterManyWildcards,
'*+h': FilterManyWildcardsHostname
};
var catKey, tokenKey;
var dict = this.categories, subdict;
var bucket = null;
var rawText = selfie.categories;
var rawEnd = rawText.length;
var lineBeg = 0, lineEnd;
var line, pos, what, factory;
while ( lineBeg < rawEnd ) {
lineEnd = rawText.indexOf('\n', lineBeg);
if ( lineEnd < 0 ) {
lineEnd = rawEnd;
}
line = rawText.slice(lineBeg, lineEnd);
lineBeg = lineEnd + 1;
pos = line.indexOf('\t');
what = line.slice(0, pos);
if ( what === 'k1' ) {
catKey = decode(line.slice(pos + 1));
subdict = dict[catKey] = {};
bucket = null;
continue;
}
if ( what === 'k2' ) {
tokenKey = decode(line.slice(pos + 1));
bucket = null;
continue;
}
factory = factories[what];
if ( bucket === null ) {
bucket = subdict[tokenKey] = factory.fromSelfie(line.slice(pos + 1));
continue;
}
// When token key is reused, it can't be anything
// else than FilterBucket
bucket.add(factory.fromSelfie(line.slice(pos + 1)));
}
};
/******************************************************************************/
2014-06-23 16:42:43 -06:00
FilterContainer.prototype.makeCategoryKey = function(category) {
return String.fromCharCode(category);
};
/******************************************************************************/
FilterContainer.prototype.add = function(s) {
// ORDER OF TESTS IS IMPORTANT!
// Ignore empty lines
if ( reIgnoreEmpty.test(s) ) {
return false;
}
// Ignore comments
if ( reIgnoreComment.test(s) ) {
return false;
}
var parsed = this.filterParser.parse(s);
2014-09-08 15:46:58 -06:00
// Ignore rules with other conditions for now
if ( parsed.unsupported ) {
this.rejectedCount += 1;
// console.log('µBlock> abp-filter.js/FilterContainer.add(): unsupported filter "%s"', s);
return false;
}
2014-06-23 16:42:43 -06:00
// Ignore element-hiding filters
if ( parsed.elemHiding ) {
return false;
}
this.processedFilterCount += 1;
this.acceptedCount += 1;
2014-09-19 08:59:44 -06:00
// Pure hostnames, use more efficient liquid dict
if ( parsed.hostnamePure && parsed.action === BlockAction ) {
2014-06-23 16:42:43 -06:00
if ( parsed.fopts === '' ) {
2014-09-19 08:59:44 -06:00
if ( this.blockedAnyPartyHostnames.add(parsed.f) ) {
this.blockFilterCount++;
} else {
this.duplicateCount++;
}
return true;
}
if ( parsed.fopts === 'third-party' ) {
if ( this.blocked3rdPartyHostnames.add(parsed.f) ) {
this.blockFilterCount++;
} else {
this.duplicateCount++;
}
return true;
2014-06-23 16:42:43 -06:00
}
}
2014-09-19 08:59:44 -06:00
if ( this.duplicates[s] ) {
this.duplicateCount++;
return false;
}
this.duplicates[s] = true;
2014-06-23 16:42:43 -06:00
var r = this.addFilter(parsed);
if ( r === false ) {
return false;
}
if ( parsed.action ) {
this.allowFilterCount += 1;
} else {
this.blockFilterCount += 1;
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.addFilter = function(parsed) {
// TODO: avoid duplicates
2014-09-19 08:59:44 -06:00
var matches = parsed.hostnameAnchored ?
findHostnameToken(parsed.f) :
findFirstGoodToken(parsed.f);
2014-06-23 16:42:43 -06:00
if ( !matches || !matches[0].length ) {
return false;
}
var tokenBeg = matches.index;
2014-09-19 08:59:44 -06:00
var tokenEnd = parsed.hostnameAnchored ?
reHostnameToken.lastIndex :
reGoodToken.lastIndex;
2014-08-28 07:59:05 -06:00
var filter;
2014-06-23 16:42:43 -06:00
2014-08-28 07:59:05 -06:00
var i = parsed.hostnames.length;
2014-08-27 16:39:08 -06:00
2014-09-19 08:59:44 -06:00
// Applies to specific domains
2014-08-28 07:59:05 -06:00
if ( i !== 0 && !parsed.notHostname ) {
while ( i-- ) {
2014-06-23 16:42:43 -06:00
filter = makeHostnameFilter(parsed, tokenBeg, parsed.hostnames[i]);
if ( !filter ) {
return false;
}
2014-09-19 15:39:25 -06:00
this.addFilterEntry(filter, parsed, AnyParty, tokenBeg, tokenEnd);
2014-06-23 16:42:43 -06:00
}
2014-08-27 16:39:08 -06:00
return true;
}
2014-09-19 08:59:44 -06:00
// Applies to all domains, with exception(s)
2014-08-27 16:39:08 -06:00
// https://github.com/gorhill/uBlock/issues/191
// Invert the purpose of the filter for negated hostnames
2014-08-28 07:59:05 -06:00
if ( i !== 0 && parsed.notHostname ) {
2014-08-27 16:39:08 -06:00
filter = makeFilter(parsed, tokenBeg);
if ( !filter ) {
return false;
}
this.addFilterEntry(filter, parsed, AnyParty, tokenBeg, tokenEnd);
// Reverse purpose of filter
2014-08-28 07:59:05 -06:00
parsed.action ^= ToggleAction;
while ( i-- ) {
filter = makeHostnameFilter(parsed, tokenBeg, parsed.hostnames[i]);
2014-06-23 16:42:43 -06:00
if ( !filter ) {
return false;
}
2014-08-28 07:59:05 -06:00
// https://github.com/gorhill/uBlock/issues/191#issuecomment-53654024
// If it is a block filter, we need to reverse the order of
// evaluation.
if ( parsed.action === BlockAction ) {
2014-08-29 13:02:31 -06:00
parsed.important = Important;
2014-08-28 07:59:05 -06:00
}
2014-09-19 15:39:25 -06:00
this.addFilterEntry(filter, parsed, AnyParty, tokenBeg, tokenEnd);
2014-06-23 16:42:43 -06:00
}
2014-08-27 16:39:08 -06:00
return true;
}
2014-09-19 08:59:44 -06:00
// Applies to all domains without exceptions
2014-08-27 16:39:08 -06:00
filter = makeFilter(parsed, tokenBeg);
if ( !filter ) {
return false;
}
if ( parsed.firstParty ) {
this.addFilterEntry(filter, parsed, FirstParty, tokenBeg, tokenEnd);
} else if ( parsed.thirdParty ) {
this.addFilterEntry(filter, parsed, ThirdParty, tokenBeg, tokenEnd);
2014-06-23 16:42:43 -06:00
} else {
2014-08-27 16:39:08 -06:00
this.addFilterEntry(filter, parsed, AnyParty, tokenBeg, tokenEnd);
2014-06-23 16:42:43 -06:00
}
return true;
};
/******************************************************************************/
FilterContainer.prototype.addFilterEntry = function(filter, parsed, party, tokenBeg, tokenEnd) {
var s = parsed.f;
var tokenKey = s.slice(tokenBeg, tokenEnd);
2014-08-29 13:02:31 -06:00
var bits = parsed.action | parsed.important | party;
2014-06-23 16:42:43 -06:00
if ( parsed.types.length === 0 ) {
2014-08-29 13:02:31 -06:00
this.addToCategory(bits | AnyType, tokenKey, filter);
2014-06-23 16:42:43 -06:00
return;
}
var n = parsed.types.length;
for ( var i = 0; i < n; i++ ) {
2014-08-29 13:02:31 -06:00
this.addToCategory(bits | parsed.types[i], tokenKey, filter);
2014-06-23 16:42:43 -06:00
}
};
/******************************************************************************/
FilterContainer.prototype.addToCategory = function(category, tokenKey, filter) {
var categoryKey = this.makeCategoryKey(category);
var categoryBucket = this.categories[categoryKey];
if ( !categoryBucket ) {
categoryBucket = this.categories[categoryKey] = {};
}
var filterEntry = categoryBucket[tokenKey];
if ( filterEntry === undefined ) {
categoryBucket[tokenKey] = filter;
return;
}
2014-09-08 15:46:58 -06:00
if ( filterEntry.fid === '[]' ) {
2014-06-23 16:42:43 -06:00
filterEntry.add(filter);
return;
}
categoryBucket[tokenKey] = new FilterBucket(filterEntry, filter);
};
/******************************************************************************/
2014-07-20 13:00:26 -06:00
FilterContainer.prototype.matchTokens = function(url) {
2014-06-23 16:42:43 -06:00
var re = this.reAnyToken;
2014-08-29 13:02:31 -06:00
var matches, beg, token, f;
var buckets = this.buckets;
var bucket0 = buckets[0];
var bucket1 = buckets[1];
var bucket2 = buckets[2];
var bucket3 = buckets[3];
2014-06-23 16:42:43 -06:00
re.lastIndex = 0;
while ( matches = re.exec(url) ) {
beg = matches.index;
token = url.slice(beg, re.lastIndex);
2014-08-30 14:39:25 -06:00
if ( bucket0 !== undefined && bucket0.hasOwnProperty(token) ) {
f = bucket0[token];
2014-08-30 14:39:25 -06:00
if ( f.match(url, beg) !== false ) {
2014-08-28 07:59:05 -06:00
return f;
}
2014-06-23 16:42:43 -06:00
}
2014-08-30 14:39:25 -06:00
if ( bucket1 !== undefined && bucket1.hasOwnProperty(token) ) {
f = bucket1[token];
2014-08-30 14:39:25 -06:00
if ( f.match(url, beg) !== false ) {
2014-08-28 07:59:05 -06:00
return f;
}
2014-06-23 16:42:43 -06:00
}
2014-08-30 14:39:25 -06:00
if ( bucket2 !== undefined && bucket2.hasOwnProperty(token) ) {
f = bucket2[token];
2014-08-30 14:39:25 -06:00
if ( f.match(url, beg) !== false ) {
2014-08-28 07:59:05 -06:00
return f;
}
2014-06-23 16:42:43 -06:00
}
2014-08-30 14:39:25 -06:00
if ( bucket3 !== undefined && bucket3.hasOwnProperty(token) ) {
f = bucket3[token];
2014-08-30 14:39:25 -06:00
if ( f.match(url, beg) !== false ) {
2014-08-28 07:59:05 -06:00
return f;
}
2014-06-23 16:42:43 -06:00
}
}
return false;
};
/******************************************************************************/
// This is where we test filters which have the form:
//
// `||www.example.com^`
//
// Because LiquidDict is well optimized to deal with plain hostname, we gain
// reusing it here for these sort of filters rather than using filters
// specialized to deal with other complex filters.
FilterContainer.prototype.matchAnyPartyHostname = function(requestHostname) {
2014-09-19 08:59:44 -06:00
var pos;
while ( this.blockedAnyPartyHostnames.test(requestHostname) !== true ) {
pos = requestHostname.indexOf('.');
if ( pos === -1 ) {
return false;
2014-06-23 16:42:43 -06:00
}
2014-09-19 08:59:44 -06:00
requestHostname = requestHostname.slice(pos + 1);
2014-06-23 16:42:43 -06:00
}
2014-09-19 08:59:44 -06:00
return '||' + requestHostname + '^';
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
// This is where we test filters which have the form:
//
// `||www.example.com^$third-party`
//
// Because LiquidDict is well optimized to deal with plain hostname, we gain
// reusing it here for these sort of filters rather than using filters
// specialized to deal with other complex filters.
FilterContainer.prototype.match3rdPartyHostname = function(requestHostname) {
2014-09-19 08:59:44 -06:00
var pos;
while ( this.blocked3rdPartyHostnames.test(requestHostname) !== true ) {
pos = requestHostname.indexOf('.');
if ( pos === -1 ) {
return false;
2014-06-23 16:42:43 -06:00
}
2014-09-19 08:59:44 -06:00
requestHostname = requestHostname.slice(pos + 1);
2014-06-23 16:42:43 -06:00
}
2014-09-19 08:59:44 -06:00
return '||' + requestHostname + '^$third-party';
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
// Specialized handlers
2014-07-29 19:10:00 -06:00
// https://github.com/gorhill/uBlock/issues/116
// Some type of requests are exceptional, they need custom handling,
// not the generic handling.
2014-09-14 14:20:40 -06:00
FilterContainer.prototype.matchStringExactType = function(pageDetails, requestURL, requestType) {
var url = requestURL.toLowerCase();
var pageDomain = pageDetails.pageDomain || '';
2014-09-14 14:20:40 -06:00
var requestHostname = µb.URI.hostnameFromURI(requestURL);
var party = requestHostname.slice(-pageDomain.length) === pageDomain ?
FirstParty :
ThirdParty;
var type = typeNameToTypeValue[requestType];
var categories = this.categories;
var buckets = this.buckets;
// This will be used by hostname-based filters
pageHostname = pageDetails.pageHostname || '';
2014-09-19 15:39:25 -06:00
buckets[0] = buckets[1] = undefined;
2014-08-29 13:02:31 -06:00
// https://github.com/gorhill/uBlock/issues/139
// Test against important block filters
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(BlockAnyParty | Important | type)];
buckets[3] = categories[this.makeCategoryKey(BlockAction | Important | type | party)];
2014-08-29 13:02:31 -06:00
var bf = this.matchTokens(url);
if ( bf !== false ) {
return bf.toString();
}
// Test against block filters
2014-08-29 13:02:31 -06:00
// If there is no block filter, no need to test against allow filters
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(BlockAnyParty | type)];
buckets[3] = categories[this.makeCategoryKey(BlockAction | type | party)];
2014-08-29 13:02:31 -06:00
bf = this.matchTokens(url);
2014-08-28 07:59:05 -06:00
if ( bf === false ) {
2014-09-14 14:20:40 -06:00
return '';
}
// Test against allow filters
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(AllowAnyParty | type)];
buckets[3] = categories[this.makeCategoryKey(AllowAction | type | party)];
2014-08-28 07:59:05 -06:00
var af = this.matchTokens(url);
if ( af !== false ) {
return '@@' + af.toString();
}
2014-08-28 07:59:05 -06:00
return bf.toString();
};
/******************************************************************************/
2014-09-14 14:20:40 -06:00
FilterContainer.prototype.matchString = function(pageDetails, requestURL, requestType) {
2014-06-23 16:42:43 -06:00
// https://github.com/gorhill/httpswitchboard/issues/239
// Convert url to lower case:
// `match-case` option not supported, but then, I saw only one
// occurrence of it in all the supported lists (bulgaria list).
2014-07-20 13:00:26 -06:00
var url = requestURL.toLowerCase();
2014-06-23 16:42:43 -06:00
// The logic here is simple:
//
// block = !whitelisted && blacklisted
// or equivalent
// allow = whitelisted || !blacklisted
2014-06-28 09:40:26 -06:00
// Statistically, hits on a URL in order of likelihood:
// 1. No hit
// 2. Hit on a block filter
// 3. Hit on an allow filter
//
// High likelihood of "no hit" means to optimize we need to reduce as much
// as possible the number of filters to test.
//
// Then, because of the order of probabilities, we should test only
// block filters first, and test allow filters if and only if there is a
// hit on a block filter. Since there is a high likelihood of no hit,
// testing allow filter by default is likely wasted work, hence allow
2014-06-28 09:41:49 -06:00
// filters are tested *only* if there is a (unlikely) hit on a block
// filter.
2014-06-23 16:42:43 -06:00
2014-06-24 19:46:37 -06:00
var pageDomain = pageDetails.pageDomain || '';
2014-09-14 14:20:40 -06:00
var requestHostname = µb.URI.hostnameFromURI(requestURL);
// Find out the relation between the page and request
var party = ThirdParty;
if ( requestHostname.slice(0 - pageDomain.length) === pageDomain ) {
// Be sure to not confuse 'example.com' with 'anotherexample.com'
var c = requestHostname.charAt(0 - pageDomain.length - 1);
if ( c === '' || c === '.' ) {
party = FirstParty;
}
}
2014-06-23 16:42:43 -06:00
2014-06-24 19:46:37 -06:00
// This will be used by hostname-based filters
pageHostname = pageDetails.pageHostname || '';
var type = typeNameToTypeValue[requestType];
var categories = this.categories;
var buckets = this.buckets;
2014-08-29 13:02:31 -06:00
// https://github.com/gorhill/uBlock/issues/139
// Test against important block filters.
// The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block".
buckets[0] = categories[this.makeCategoryKey(BlockAnyTypeAnyParty | Important)];
buckets[1] = categories[this.makeCategoryKey(BlockAnyType | Important | party)];
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(BlockAnyParty | Important | type)];
buckets[3] = categories[this.makeCategoryKey(BlockAction | Important | type | party)];
2014-08-29 13:02:31 -06:00
var bf = this.matchTokens(url);
if ( bf !== false ) {
2014-09-14 14:20:40 -06:00
return bf.toString() + '$important';
2014-08-29 13:02:31 -06:00
}
// Test hostname-based block filters
bf = this.matchAnyPartyHostname(requestHostname);
if ( bf === false && party === ThirdParty ) {
bf = this.match3rdPartyHostname(requestHostname);
}
2014-06-23 16:42:43 -06:00
// Test against block filters
2014-08-28 07:59:05 -06:00
if ( bf === false ) {
buckets[0] = categories[this.makeCategoryKey(BlockAnyTypeAnyParty)];
buckets[1] = categories[this.makeCategoryKey(BlockAnyType | party)];
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(BlockAnyParty | type)];
buckets[3] = categories[this.makeCategoryKey(BlockAction | type | party)];
2014-08-28 07:59:05 -06:00
bf = this.matchTokens(url);
2014-06-23 16:42:43 -06:00
}
// If there is no block filter, no need to test against allow filters
2014-08-28 07:59:05 -06:00
if ( bf === false ) {
2014-09-14 14:20:40 -06:00
return '';
2014-06-23 16:42:43 -06:00
}
// Test against allow filters
buckets[0] = categories[this.makeCategoryKey(AllowAnyTypeAnyParty)];
buckets[1] = categories[this.makeCategoryKey(AllowAnyType | party)];
2014-09-19 15:39:25 -06:00
buckets[2] = categories[this.makeCategoryKey(AllowAnyParty | type)];
buckets[3] = categories[this.makeCategoryKey(AllowAction | type | party)];
2014-08-28 07:59:05 -06:00
var af = this.matchTokens(url);
if ( af !== false ) {
return '@@' + af.toString();
2014-06-23 16:42:43 -06:00
}
2014-08-28 07:59:05 -06:00
return bf.toString();
2014-06-23 16:42:43 -06:00
};
/******************************************************************************/
FilterContainer.prototype.getFilterCount = function() {
return this.blockFilterCount + this.allowFilterCount;
};
/******************************************************************************/
return new FilterContainer();
/******************************************************************************/
})();