Raymond Hill 2018-10-23 14:01:08 -03:00
parent 4a442eece4
commit cabb0d36b6
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 194 additions and 275 deletions

View File

@ -140,8 +140,8 @@ var µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 5, // Increase when compiled format changes
selfieMagic: 4 // Increase when selfie format changes
compiledMagic: 6, // Increase when compiled format changes
selfieMagic: 6 // Increase when selfie format changes
},
restoreBackupSettings: {

View File

@ -33,16 +33,15 @@
var µb = µBlock;
// fedcba9876543210
// | | | |||
// | | | |||
// | | | |||
// | | | |||
// | | | ||+---- bit 0: [BlockAction | AllowAction]
// | | | |+----- bit 1: `important`
// | | | +------ bit 2- 3: party [0 - 3]
// | | +-------- bit 4- 8: type [0 - 31]
// | +------------- bit 9-14: unused
// +------------------- bit 15: bad filter
// | | |||
// | | |||
// | | |||
// | | |||
// | | ||+---- bit 0: [BlockAction | AllowAction]
// | | |+----- bit 1: `important`
// | | +------ bit 2- 3: party [0 - 3]
// | +-------- bit 4- 8: type [0 - 31]
// +------------- bit 9-15: unused
var BlockAction = 0 << 0;
var AllowAction = 1 << 0;
@ -50,7 +49,6 @@ var Important = 1 << 1;
var AnyParty = 0 << 2;
var FirstParty = 1 << 2;
var ThirdParty = 2 << 2;
var BadFilter = 1 << 15;
var AnyType = 0 << 4;
var typeNameToTypeValue = {
@ -126,50 +124,6 @@ var pageHostnameRegister = '',
//var filterRegister = null;
//var categoryRegister = '';
/******************************************************************************/
var histogram = function() {};
/*
histogram = function(label, categories) {
var h = [],
categoryBucket;
for ( var k in categories ) {
// No need for hasOwnProperty() here: there is no prototype chain.
categoryBucket = categories[k];
for ( var kk in categoryBucket ) {
// No need for hasOwnProperty() here: there is no prototype chain.
filterBucket = categoryBucket[kk];
h.push({
k: k.charCodeAt(0).toString(2) + ' ' + kk,
n: filterBucket instanceof FilterBucket ? filterBucket.filters.length : 1
});
}
}
console.log('Histogram %s', label);
var total = h.length;
h.sort(function(a, b) { return b.n - a.n; });
// Find indices of entries of interest
var target = 2;
for ( var i = 0; i < total; i++ ) {
if ( h[i].n === target ) {
console.log('\tEntries with only %d filter(s) start at index %s (key = "%s")', target, i, h[i].k);
target -= 1;
}
}
h = h.slice(0, 50);
h.forEach(function(v) {
console.log('\tkey=%s count=%d', v.k, v.n);
});
console.log('\tTotal buckets count: %d', total);
};
*/
/******************************************************************************/
// Local helpers
// Be sure to not confuse 'example.com' with 'anotherexample.com'
@ -219,16 +173,20 @@ rawToRegexStr.escape4 = /\*/g;
rawToRegexStr.reTextHostnameAnchor1 = '^[a-z-]+://(?:[^/?#]+\\.)?';
rawToRegexStr.reTextHostnameAnchor2 = '^[a-z-]+://(?:[^/?#]+)?';
var filterFingerprinter = µb.CompiledLineWriter.fingerprint;
const filterDataSerialize = µb.CompiledLineIO.serialize;
var toLogDataInternal = function(categoryBits, tokenHash, filter) {
if ( filter === null ) { return undefined; }
var logData = filter.logData();
logData.compiled = filterFingerprinter([ categoryBits, tokenHash, logData.compiled ]);
let logData = filter.logData();
logData.compiled = filterDataSerialize([
categoryBits,
tokenHash,
logData.compiled
]);
if ( categoryBits & 0x001 ) {
logData.raw = '@@' + logData.raw;
}
var opts = [];
let opts = [];
if ( categoryBits & 0x002 ) {
opts.push('important');
}
@ -237,7 +195,7 @@ var toLogDataInternal = function(categoryBits, tokenHash, filter) {
} else if ( categoryBits & 0x004 ) {
opts.push('first-party');
}
var type = categoryBits & 0x1F0;
let type = categoryBits & 0x1F0;
if ( type !== 0 && type !== typeNameToTypeValue.data ) {
opts.push(typeValueToTypeName[type >>> 4]);
}
@ -300,16 +258,12 @@ var registerFilterClass = function(ctor) {
var fid = filterClassIdGenerator++;
ctor.fid = ctor.prototype.fid = fid;
filterClasses[fid] = ctor;
//console.log(ctor.name, fid);
};
var filterFromCompiledData = function(args) {
//filterClassHistogram.set(fid, (filterClassHistogram.get(fid) || 0) + 1);
return filterClasses[args[0]].load(args);
};
//var filterClassHistogram = new Map();
/******************************************************************************/
var FilterTrue = function() {
@ -1424,7 +1378,7 @@ FilterParser.prototype.toNormalizedType = {
FilterParser.prototype.reset = function() {
this.action = BlockAction;
this.anchor = 0;
this.badFilter = 0;
this.badFilter = false;
this.dataType = undefined;
this.dataStr = undefined;
this.elemHiding = false;
@ -1594,7 +1548,7 @@ FilterParser.prototype.parseOptions = function(s) {
}
// https://github.com/uBlockOrigin/uAssets/issues/192
if ( opt === 'badfilter' ) {
this.badFilter = BadFilter;
this.badFilter = true;
continue;
}
// Unrecognized filter option: ignore whole filter.
@ -2000,17 +1954,12 @@ FilterContainer.prototype.reset = function() {
this.allowFilterCount = 0;
this.blockFilterCount = 0;
this.discardedCount = 0;
this.goodFilters = new Set();
this.badFilters = new Set();
this.duplicateBuster = new Set();
this.categories = new Map();
this.dataFilters = new Map();
this.filterParser.reset();
// Reuse filter instances whenever possible at load time.
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
// Runtime registers
this.cbRegister = undefined;
this.thRegister = undefined;
@ -2020,18 +1969,82 @@ FilterContainer.prototype.reset = function() {
/******************************************************************************/
FilterContainer.prototype.freeze = function() {
histogram('allFilters', this.categories);
this.removeBadFilters();
this.duplicateBuster = new Set();
let filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid,
filterDataHolderId = FilterDataHolder.fid,
redirectTypeValue = typeNameToTypeValue.redirect,
unserialize = µb.CompiledLineIO.unserialize;
for ( let line of this.goodFilters ) {
if ( this.badFilters.has(line) ) { continue; }
let args = unserialize(line);
let bits = args[0];
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( (bits & 0x1F0) === redirectTypeValue ) {
µb.redirectEngine.fromCompiledRule(args[1]);
continue;
}
// Plain static filters.
let tokenHash = args[1];
let fdata = args[2];
// Special treatment: data-holding filters are stored separately
// because they require special matching algorithm (unlike other
// filters, ALL hits must be reported).
if ( fdata[0] === filterDataHolderId ) {
let entry = new FilterDataHolderEntry(bits, tokenHash, fdata);
let bucket = this.dataFilters.get(tokenHash);
if ( bucket !== undefined ) {
entry.next = bucket;
}
this.dataFilters.set(tokenHash, entry);
continue;
}
let bucket = this.categories.get(bits);
if ( bucket === undefined ) {
bucket = new Map();
this.categories.set(bits, bucket);
}
let entry = bucket.get(tokenHash);
if ( tokenHash === this.dotTokenHash ) {
if ( entry === undefined ) {
entry = new FilterHostnameDict();
bucket.set(this.dotTokenHash, entry);
}
entry.add(fdata);
continue;
}
if ( entry === undefined ) {
bucket.set(tokenHash, filterFromCompiledData(fdata));
continue;
}
if ( entry.fid === filterBucketId ) {
entry.add(fdata);
continue;
}
if ( entry.fid === filterPairId ) {
bucket.set(
tokenHash,
entry.upgrade(filterFromCompiledData(fdata))
);
continue;
}
bucket.set(
tokenHash,
new FilterPair(entry, filterFromCompiledData(fdata))
);
}
this.filterParser.reset();
this.fclassLast = null;
this.fdataLast = null;
this.filterLast = null;
this.goodFilters = new Set();
this.frozen = true;
//console.log(JSON.stringify(Array.from(filterClassHistogram)));
//this.tokenHistogram = new Map(Array.from(this.tokenHistogram).sort(function(a, b) {
// return a[0].localeCompare(b[0]) || (b[1] - a[1]);
//}));
};
/******************************************************************************/
@ -2125,9 +2138,6 @@ FilterContainer.prototype.compile = function(raw, writer) {
return false;
}
// 0 = network filters
writer.select(0);
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
@ -2207,10 +2217,16 @@ FilterContainer.prototype.compileToAtomicFilter = function(
fdata,
writer
) {
let descBits = parsed.action |
parsed.important |
parsed.party |
parsed.badFilter;
// 0 = network filters
// 1 = network filters: bad filters
if ( parsed.badFilter ) {
writer.select(1);
} else {
writer.select(0);
}
let descBits = parsed.action | parsed.important | parsed.party;
let type = parsed.types;
// Typeless
@ -2231,7 +2247,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(
// Only static filter with an explicit type can be redirected. If we reach
// this point, it's because there is one or more explicit type.
if ( parsed.badFilter === 0 && parsed.redirect ) {
if ( parsed.badFilter === false && parsed.redirect ) {
let redirects = µb.redirectEngine.compileRuleFromStaticFilter(parsed.raw);
if ( Array.isArray(redirects) ) {
for ( let redirect of redirects ) {
@ -2244,138 +2260,30 @@ FilterContainer.prototype.compileToAtomicFilter = function(
/******************************************************************************/
FilterContainer.prototype.fromCompiledContent = function(reader) {
var badFilterBit = BadFilter,
filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid,
filterDataHolderId = FilterDataHolder.fid,
redirectTypeValue = typeNameToTypeValue.redirect,
args, bits, bucket, entry,
tokenHash, fdata, fingerprint;
// 0 = network filters
reader.select(0);
while ( reader.next() === true ) {
args = reader.args();
bits = args[0];
if ( (bits & badFilterBit) !== 0 ) {
this.badFilters.add(args);
while ( reader.next() ) {
if ( this.goodFilters.has(reader.line) ) {
this.discardedCount += 1;
continue;
}
// Special cases: delegate to more specialized engines.
// Redirect engine.
if ( (bits & 0x1F0) === redirectTypeValue ) {
µb.redirectEngine.fromCompiledRule(args[1]);
continue;
}
this.goodFilters.add(reader.line);
this.acceptedCount += 1;
}
// Plain static filters.
fingerprint = reader.fingerprint();
tokenHash = args[1];
fdata = args[2];
// Special treatment: data-holding filters are stored separately
// because they require special matching algorithm (unlike other
// filters, ALL hits must be reported).
if ( fdata[0] === filterDataHolderId ) {
if ( this.duplicateBuster.has(fingerprint) ) {
// 1 = network filters: bad filters
// Since we are going to keep bad filter fingerprints around, we ensure
// they are "detached" from the parent string from which they are sliced.
// We keep bad filter fingerprints around to use them when user
// incrementally add filters (through "Block element" for example).
reader.select(1);
while ( reader.next() ) {
if ( this.badFilters.has(reader.line) ) {
this.discardedCount += 1;
continue;
}
this.duplicateBuster.add(fingerprint);
entry = new FilterDataHolderEntry(bits, tokenHash, fdata);
bucket = this.dataFilters.get(tokenHash);
if ( bucket !== undefined ) {
entry.next = bucket;
}
this.dataFilters.set(tokenHash, entry);
continue;
}
bucket = this.categories.get(bits);
if ( bucket === undefined ) {
bucket = new Map();
this.categories.set(bits, bucket);
}
entry = bucket.get(tokenHash);
if ( tokenHash === this.dotTokenHash ) {
if ( entry === undefined ) {
entry = new FilterHostnameDict();
bucket.set(this.dotTokenHash, entry);
}
if ( entry.add(fdata) === false ) {
this.discardedCount += 1;
}
continue;
}
if ( this.duplicateBuster.has(fingerprint) ) {
this.discardedCount += 1;
continue;
}
this.duplicateBuster.add(fingerprint);
if ( entry === undefined ) {
bucket.set(tokenHash, filterFromCompiledData(fdata));
continue;
}
if ( entry.fid === filterBucketId ) {
entry.add(fdata);
continue;
}
if ( entry.fid === filterPairId ) {
bucket.set(
tokenHash,
entry.upgrade(filterFromCompiledData(fdata))
);
continue;
}
bucket.set(
tokenHash,
new FilterPair(entry, filterFromCompiledData(fdata))
);
}
};
/******************************************************************************/
FilterContainer.prototype.removeBadFilters = function() {
var filterPairId = FilterPair.fid,
filterBucketId = FilterBucket.fid,
filterHostnameDictId = FilterHostnameDict.fid,
bits, tokenHash, fdata, bucket, entry;
for ( var args of this.badFilters ) {
bits = args[0] & ~BadFilter;
bucket = this.categories.get(bits);
if ( bucket === undefined ) { continue; }
tokenHash = args[1];
entry = bucket.get(tokenHash);
if ( entry === undefined ) { continue; }
fdata = args[2];
if ( entry.fid === filterPairId || entry.fid === filterBucketId ) {
entry.remove(fdata);
entry = entry.downgrade();
if ( entry !== undefined ) {
bucket.set(tokenHash, entry);
} else {
bucket.delete(tokenHash);
}
} else if ( entry.fid === filterHostnameDictId ) {
entry.remove(fdata);
if ( entry.size === 0 ) {
bucket.delete(tokenHash);
}
} else if ( arrayStrictEquals(entry.compile(), fdata) ) {
bucket.delete(tokenHash);
}
if ( bucket.size === 0 ) {
this.categories.delete(bits);
}
this.badFilters.add(µb.orphanizeString(reader.line));
this.acceptedCount += 1;
}
};

View File

@ -743,27 +743,26 @@
// Lower minimum update period to 1 day.
µBlock.extractFilterListMetadata = function(assetKey, raw) {
var listEntry = this.availableFilterLists[assetKey];
let listEntry = this.availableFilterLists[assetKey];
if ( listEntry === undefined ) { return; }
// Metadata expected to be found at the top of content.
var head = raw.slice(0, 1024),
matches, v;
let head = raw.slice(0, 1024);
// https://github.com/gorhill/uBlock/issues/313
// Always try to fetch the name if this is an external filter list.
if ( listEntry.title === '' || listEntry.group === 'custom' ) {
matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Title[\t ]*:([^\n]+)/i);
let matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Title[\t ]*:([^\n]+)/i);
if ( matches !== null ) {
// https://bugs.chromium.org/p/v8/issues/detail?id=2869
// JSON.stringify/JSON.parse is to work around String.slice()
// orphanizeString is to work around String.slice()
// potentially causing the whole raw filter list to be held in
// memory just because we cut out the title as a substring.
listEntry.title = JSON.parse(JSON.stringify(matches[1].trim()));
listEntry.title = this.orphanizeString(matches[1].trim());
}
}
// Extract update frequency information
matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Expires[\t ]*:[\t ]*(\d+)[\t ]*(h)?/i);
let matches = head.match(/(?:^|\n)(?:!|# )[\t ]*Expires[\t ]*:[\t ]*(\d+)[\t ]*(h)?/i);
if ( matches !== null ) {
v = Math.max(parseInt(matches[1], 10), 1);
let v = Math.max(parseInt(matches[1], 10), 1);
if ( matches[2] !== undefined ) {
v = Math.ceil(v / 24);
}
@ -787,29 +786,28 @@
/******************************************************************************/
µBlock.compileFilters = function(rawText) {
var writer = new this.CompiledLineWriter();
let writer = new this.CompiledLineIO.Writer();
// Useful references:
// https://adblockplus.org/en/filter-cheatsheet
// https://adblockplus.org/en/filters
var staticNetFilteringEngine = this.staticNetFilteringEngine,
let staticNetFilteringEngine = this.staticNetFilteringEngine,
staticExtFilteringEngine = this.staticExtFilteringEngine,
reIsWhitespaceChar = /\s/,
reMaybeLocalIp = /^[\d:f]/,
reIsLocalhostRedirect = /\s+(?:0\.0\.0\.0|broadcasthost|localhost|local|ip6-\w+)\b/,
reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/,
line, c, pos,
lineIter = new this.LineIterator(this.processDirectives(rawText));
while ( lineIter.eot() === false ) {
// rhill 2014-04-18: The trim is important here, as without it there
// could be a lingering `\r` which would cause problems in the
// following parsing code.
line = lineIter.next().trim();
let line = lineIter.next().trim();
if ( line.length === 0 ) { continue; }
// Strip comments
c = line.charAt(0);
let c = line.charAt(0);
if ( c === '!' || c === '[' ) { continue; }
// Parse or skip cosmetic filters
@ -828,7 +826,7 @@
// Don't remove:
// ...#blah blah blah
// because some ABP filters uses the `#` character (URL fragment)
pos = line.indexOf('#');
let pos = line.indexOf('#');
if ( pos !== -1 && reIsWhitespaceChar.test(line.charAt(pos - 1)) ) {
line = line.slice(0, pos).trim();
}
@ -860,7 +858,7 @@
µBlock.applyCompiledFilters = function(rawText, firstparty) {
if ( rawText === '' ) { return; }
var reader = new this.CompiledLineReader(rawText);
let reader = new this.CompiledLineIO.Reader(rawText);
this.staticNetFilteringEngine.fromCompiledContent(reader);
this.staticExtFilteringEngine.fromCompiledContent(reader, {
skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters,

View File

@ -224,18 +224,47 @@
/******************************************************************************/
µBlock.CompiledLineWriter = function() {
µBlock.CompiledLineIO = {
serialize: JSON.stringify,
unserialize: JSON.parse,
blockStartPrefix: '#block-start-', // ensure no special regex characters
blockEndPrefix: '#block-end-', // ensure no special regex characters
Writer: function() {
this.io = µBlock.CompiledLineIO;
this.blockId = undefined;
this.block = undefined;
this.blocks = new Map();
this.stringifier = JSON.stringify;
this.stringifier = this.io.serialize;
},
Reader: function(raw, blockId) {
this.io = µBlock.CompiledLineIO;
this.block = '';
this.len = 0;
this.offset = 0;
this.line = '';
this.parser = this.io.unserialize;
this.blocks = new Map();
let reBlockStart = new RegExp(
'^' + this.io.blockStartPrefix + '(\\d+)\\n',
'gm'
);
let match = reBlockStart.exec(raw);
while ( match !== null ) {
let beg = match.index + match[0].length;
let end = raw.indexOf(this.io.blockEndPrefix + match[1], beg);
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw);
}
if ( blockId !== undefined ) {
this.select(blockId);
}
}
};
µBlock.CompiledLineWriter.fingerprint = function(args) {
return JSON.stringify(args);
};
µBlock.CompiledLineWriter.prototype = {
µBlock.CompiledLineIO.Writer.prototype = {
push: function(args) {
this.block[this.block.length] = this.stringifier(args);
},
@ -248,50 +277,26 @@
}
},
toString: function() {
var result = [];
for ( var entry of this.blocks ) {
if ( entry[1].length === 0 ) { continue; }
let result = [];
for ( let [ id, lines ] of this.blocks ) {
if ( lines.length === 0 ) { continue; }
result.push(
'#block-start-' + entry[0],
entry[1].join('\n'),
'#block-end-' + entry[0]
this.io.blockStartPrefix + id,
lines.join('\n'),
this.io.blockEndPrefix + id
);
}
return result.join('\n');
}
};
/******************************************************************************/
µBlock.CompiledLineReader = function(raw, blockId) {
this.block = '';
this.len = 0;
this.offset = 0;
this.line = '';
this.parser = JSON.parse;
this.blocks = new Map();
var reBlockStart = /^#block-start-(\d+)\n/gm,
match = reBlockStart.exec(raw),
beg, end;
while ( match !== null ) {
beg = match.index + match[0].length;
end = raw.indexOf('#block-end-' + match[1], beg);
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw);
}
if ( blockId !== undefined ) {
this.select(blockId);
}
};
µBlock.CompiledLineReader.prototype = {
µBlock.CompiledLineIO.Reader.prototype = {
next: function() {
if ( this.offset === this.len ) {
this.line = '';
return false;
}
var pos = this.block.indexOf('\n', this.offset);
let pos = this.block.indexOf('\n', this.offset);
if ( pos !== -1 ) {
this.line = this.block.slice(this.offset, pos);
this.offset = pos + 1;
@ -466,3 +471,11 @@
return decomposed;
};
})();
/******************************************************************************/
// TODO: evaluate using TextEncoder/TextDecoder
µBlock.orphanizeString = function(s) {
return JSON.parse(JSON.stringify(s));
};