This commit is contained in:
gorhill 2017-05-19 08:45:19 -04:00
parent 92f94c7765
commit a222e23e49
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 231 additions and 202 deletions

View File

@ -121,8 +121,8 @@ var µBlock = (function() { // jshint ignore:line
// read-only // read-only
systemSettings: { systemSettings: {
compiledMagic: 'lcmfjiajoqwe', compiledMagic: 'alufjifllsxz',
selfieMagic: 'lcmfjiajoqwe' selfieMagic: 'alufjifllsxz'
}, },
restoreBackupSettings: { restoreBackupSettings: {

View File

@ -210,33 +210,33 @@ rawToRegexStr.escape4 = /\*/g;
// If using native Map, we use numerical keys, otherwise for // If using native Map, we use numerical keys, otherwise for
// Object-based map we use string-based keys. // Object-based map we use string-based keys.
var exportMapKey = function(k) { var exportInt = function(k) {
return k.toString(32); return k.toString(32);
}; };
var importMapKey = function(k) { var importInt = function(k) {
return parseInt(k,32); return parseInt(k,32);
}; };
var toLogDataInternal = function(key, token, filter) { var toLogDataInternal = function(categoryBits, tokenHash, filter) {
if ( filter === null ) { return undefined; } if ( filter === null ) { return undefined; }
var logData = filter.logData(); var logData = filter.logData();
logData.compiled = exportMapKey(key) + '\v' + logData.compiled = exportInt(categoryBits) + '\v' +
token + '\v' + exportInt(tokenHash) + '\v' +
logData.compiled; logData.compiled;
if ( key & 0x001 ) { if ( categoryBits & 0x001 ) {
logData.raw = '@@' + logData.raw; logData.raw = '@@' + logData.raw;
} }
var opts = []; var opts = [];
if ( key & 0x002 ) { if ( categoryBits & 0x002 ) {
opts.push('important'); opts.push('important');
} }
if ( key & 0x008 ) { if ( categoryBits & 0x008 ) {
opts.push('third-party'); opts.push('third-party');
} else if ( key & 0x004 ) { } else if ( categoryBits & 0x004 ) {
opts.push('first-party'); opts.push('first-party');
} }
var type = (key >>> 4) & 0x1F; var type = (categoryBits >>> 4) & 0x1F;
if ( type !== 0 && type !== 16 /* data */ ) { if ( type !== 0 && type !== 16 /* data */ ) {
opts.push(typeValueToTypeName[type]); opts.push(typeValueToTypeName[type]);
} }
@ -1008,27 +1008,29 @@ registerFilterClass(FilterDataHolder);
// Helper class for storing instances of FilterDataHolder. // Helper class for storing instances of FilterDataHolder.
var FilterDataHolderEntry = function(key, token, fdata) { var FilterDataHolderEntry = function(categoryBits, tokenHash, fdata) {
this.keyBits = key; this.categoryBits = categoryBits;
this.token = token; this.tokenHash = tokenHash;
this.filter = filterFromCompiledData(fdata); this.filter = filterFromCompiledData(fdata);
this.next = undefined; this.next = undefined;
}; };
FilterDataHolderEntry.prototype.logData = function() { FilterDataHolderEntry.prototype.logData = function() {
return toLogDataInternal(this.keyBits, this.token, this.filter); return toLogDataInternal(this.categoryBits, this.tokenHash, this.filter);
}; };
FilterDataHolderEntry.prototype.compile = function() { FilterDataHolderEntry.prototype.compile = function() {
return this.keyBits + '\t' + this.token + '\t' + this.filter.compile(); return exportInt(this.categoryBits) + '\t' +
exportInt(this.tokenHash) + '\t' +
this.filter.compile();
}; };
FilterDataHolderEntry.load = function(s) { FilterDataHolderEntry.load = function(s) {
var pos1 = s.indexOf('\t'), var pos1 = s.indexOf('\t'),
pos2 = s.indexOf('\t', pos1 + 1); pos2 = s.indexOf('\t', pos1 + 1);
return new FilterDataHolderEntry( return new FilterDataHolderEntry(
parseInt(s, 10), importInt(s),
s.slice(pos1 + 1, pos2), importInt(s.slice(pos1 + 1, pos2)),
s.slice(pos2 + 1) s.slice(pos2 + 1)
); );
}; };
@ -1254,6 +1256,7 @@ var FilterParser = function() {
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/; this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/; this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
this.domainOpt = ''; this.domainOpt = '';
this.noTokenHash = µb.urlTokenizer.tokenHashFromString('*');
this.reset(); this.reset();
}; };
@ -1305,6 +1308,7 @@ FilterParser.prototype.reset = function() {
this.raw = ''; this.raw = '';
this.redirect = false; this.redirect = false;
this.token = '*'; this.token = '*';
this.tokenHash = this.noTokenHash;
this.tokenBeg = 0; this.tokenBeg = 0;
this.types = 0; this.types = 0;
this.important = 0; this.important = 0;
@ -1803,6 +1807,7 @@ FilterParser.prototype.makeToken = function() {
if ( matches !== null ) { if ( matches !== null ) {
this.token = matches[0]; this.token = matches[0];
this.tokenHash = µb.urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index; this.tokenBeg = matches.index;
} }
}; };
@ -1814,6 +1819,9 @@ var FilterContainer = function() {
this.reIsGeneric = /[\^\*]/; this.reIsGeneric = /[\^\*]/;
this.filterParser = new FilterParser(); this.filterParser = new FilterParser();
this.urlTokenizer = µb.urlTokenizer; this.urlTokenizer = µb.urlTokenizer;
this.noTokenHash = this.urlTokenizer.tokenHashFromString('*');
this.dotTokenHash = this.urlTokenizer.tokenHashFromString('.');
this.exportedDotTokenHash = exportInt(this.dotTokenHash);
this.reset(); this.reset();
}; };
@ -1841,8 +1849,8 @@ FilterContainer.prototype.reset = function() {
this.filterLast = null; this.filterLast = null;
// Runtime registers // Runtime registers
this.keyRegister = undefined; this.cbRegister = undefined;
this.tokenRegister = undefined; this.thRegister = undefined;
this.fRegister = null; this.fRegister = null;
}; };
@ -1873,7 +1881,7 @@ FilterContainer.prototype.toSelfie = function() {
for (;;) { for (;;) {
entry = iterator.next(); entry = iterator.next();
if ( entry.done === true ) { break; } if ( entry.done === true ) { break; }
selfie.push('k2\t' + entry.value[0]); // token selfie.push('k2\t' + exportInt(entry.value[0])); // token hash
selfie.push(entry.value[1].compile()); selfie.push(entry.value[1].compile());
} }
return selfie.join('\n'); return selfie.join('\n');
@ -1886,7 +1894,7 @@ FilterContainer.prototype.toSelfie = function() {
for (;;) { for (;;) {
entry = iterator.next(); entry = iterator.next();
if ( entry.done === true ) { break; } if ( entry.done === true ) { break; }
selfie.push('k1\t' + exportMapKey(entry.value[0])); // key selfie.push('k1\t' + exportInt(entry.value[0])); // category bits
selfie.push(categoryToSelfie(entry.value[1])); selfie.push(categoryToSelfie(entry.value[1]));
} }
return selfie.join('\n'); return selfie.join('\n');
@ -1931,34 +1939,34 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
this.blockFilterCount = selfie.blockFilterCount; this.blockFilterCount = selfie.blockFilterCount;
this.discardedCount = selfie.discardedCount; this.discardedCount = selfie.discardedCount;
var catKey, tokenKey, var categoryBits, tokenHash,
map = this.categories, submap, map = this.categories, submap,
lineIter = new µb.LineIterator(selfie.categories), lineIter = new µb.LineIterator(selfie.categories),
line; line;
while ( lineIter.eot() === false ) { while ( lineIter.eot() === false ) {
line = lineIter.next(); line = lineIter.next();
if ( line.startsWith('k1\t') ) { if ( line.startsWith('k1\t') ) { // category bits
catKey = importMapKey(line.slice(3)); categoryBits = importInt(line.slice(3));
submap = new Map(); submap = new Map();
map.set(catKey, submap); map.set(categoryBits, submap);
continue; continue;
} }
if ( line.startsWith('k2\t') ) { if ( line.startsWith('k2\t') ) { // token hash
tokenKey = line.slice(3); tokenHash = importInt(line.slice(3));
continue; continue;
} }
submap.set(tokenKey, filterFromCompiledData(line)); submap.set(tokenHash, filterFromCompiledData(line));
} }
var i = selfie.dataFilters.length, var i = selfie.dataFilters.length,
entry, bucket; entry, bucket;
while ( i-- ) { while ( i-- ) {
entry = FilterDataHolderEntry.load(selfie.dataFilters[i]); entry = FilterDataHolderEntry.load(selfie.dataFilters[i]);
bucket = this.dataFilters.get(entry.token); bucket = this.dataFilters.get(entry.tokenHash);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
entry.next = bucket; entry.next = bucket;
} }
this.dataFilters.set(entry.token, entry); this.dataFilters.set(entry.tokenHash, entry);
} }
}; };
@ -2019,7 +2027,10 @@ FilterContainer.prototype.compile = function(raw, out) {
} else if ( parsed.anchor === 0x5 ) { } else if ( parsed.anchor === 0x5 ) {
// https://github.com/gorhill/uBlock/issues/1669 // https://github.com/gorhill/uBlock/issues/1669
fdata += FilterGenericHnAndRightAnchored.compile(parsed); fdata += FilterGenericHnAndRightAnchored.compile(parsed);
} else if ( this.reIsGeneric.test(parsed.f) || parsed.token === '*' ) { } else if (
this.reIsGeneric.test(parsed.f) ||
parsed.tokenHash === parsed.noTokenHash
) {
if ( parsed.anchor === 0x4 ) { if ( parsed.anchor === 0x4 ) {
fdata += FilterGenericHnAnchored.compile(parsed); fdata += FilterGenericHnAnchored.compile(parsed);
} else { } else {
@ -2057,14 +2068,14 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) {
//} //}
var route = parsed.badFilter ? 0x01 : 0x00, var route = parsed.badFilter ? 0x01 : 0x00,
keyShard = parsed.action | parsed.important | parsed.party; categoryBits = parsed.action | parsed.important | parsed.party;
var type = parsed.types; var type = parsed.types;
if ( type === 0 ) { if ( type === 0 ) {
out.push( out.push(
route, route,
exportMapKey(keyShard) + '\v' + exportInt(categoryBits) + '\v' +
'.\v' + this.exportedDotTokenHash + '\v' +
parsed.f parsed.f
); );
return true; return true;
@ -2075,8 +2086,8 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) {
if ( type & 1 ) { if ( type & 1 ) {
out.push( out.push(
route, route,
exportMapKey(keyShard | (bitOffset << 4)) + '\v' + exportInt(categoryBits | (bitOffset << 4)) + '\v' +
'.\v' + this.exportedDotTokenHash + '\v' +
parsed.f parsed.f
); );
} }
@ -2090,13 +2101,13 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) {
FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) { FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) {
var route = parsed.badFilter ? 0x01 : 0x00, var route = parsed.badFilter ? 0x01 : 0x00,
bits = parsed.action | parsed.important | parsed.party, categoryBits = parsed.action | parsed.important | parsed.party,
type = parsed.types; type = parsed.types;
if ( type === 0 ) { if ( type === 0 ) {
out.push( out.push(
route, route,
exportMapKey(bits) + '\v' + exportInt(categoryBits) + '\v' +
parsed.token + '\v' + exportInt(parsed.tokenHash) + '\v' +
fdata fdata
); );
return; return;
@ -2106,8 +2117,8 @@ FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) {
if ( type & 1 ) { if ( type & 1 ) {
out.push( out.push(
route, route,
exportMapKey(bits | (bitOffset << 4)) + '\v' + exportInt(categoryBits | (bitOffset << 4)) + '\v' +
parsed.token + '\v' + exportInt(parsed.tokenHash) + '\v' +
fdata fdata
); );
} }
@ -2138,7 +2149,7 @@ FilterContainer.prototype.compileToAtomicFilter = function(fdata, parsed, out) {
/******************************************************************************/ /******************************************************************************/
FilterContainer.prototype.fromCompiledContent = function(lineIter) { FilterContainer.prototype.fromCompiledContent = function(lineIter) {
var line, lineBits, hash, token, fdata, var line, lineBits, categoryBits, tokenHash, fdata,
bucket, entry, filter, bucket, entry, filter,
fieldIter = new µb.FieldIterator('\v'), fieldIter = new µb.FieldIterator('\v'),
dataFilterFid = FilterDataHolder.fidPrefix, dataFilterFid = FilterDataHolder.fidPrefix,
@ -2159,8 +2170,8 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) {
continue; continue;
} }
hash = importMapKey(fieldIter.first(line)); categoryBits = importInt(fieldIter.first(line));
token = fieldIter.next(); tokenHash = importInt(fieldIter.next());
fdata = fieldIter.remainder(); fdata = fieldIter.remainder();
// Special cases: delegate to more specialized engines. // Special cases: delegate to more specialized engines.
@ -2182,26 +2193,26 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) {
continue; continue;
} }
this.duplicateBuster.add(line); this.duplicateBuster.add(line);
entry = new FilterDataHolderEntry(hash, token, fdata); entry = new FilterDataHolderEntry(categoryBits, tokenHash, fdata);
bucket = this.dataFilters.get(token); bucket = this.dataFilters.get(tokenHash);
if ( bucket !== undefined ) { if ( bucket !== undefined ) {
entry.next = bucket; entry.next = bucket;
} }
this.dataFilters.set(token, entry); this.dataFilters.set(tokenHash, entry);
continue; continue;
} }
bucket = this.categories.get(hash); bucket = this.categories.get(categoryBits);
if ( bucket === undefined ) { if ( bucket === undefined ) {
bucket = new Map(); bucket = new Map();
this.categories.set(hash, bucket); this.categories.set(categoryBits, bucket);
} }
entry = bucket.get(token); entry = bucket.get(tokenHash);
if ( token === '.' ) { if ( tokenHash === this.dotTokenHash ) {
if ( entry === undefined ) { if ( entry === undefined ) {
entry = new FilterHostnameDict(); entry = new FilterHostnameDict();
bucket.set('.', entry); bucket.set(this.dotTokenHash, entry);
} }
if ( entry.add(fdata) === false ) { if ( entry.add(fdata) === false ) {
this.discardedCount += 1; this.discardedCount += 1;
@ -2215,18 +2226,18 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) {
} }
this.duplicateBuster.add(line); this.duplicateBuster.add(line);
//this.tokenHistogram.set(token, (this.tokenHistogram.get(token) || 0) + 1); //this.tokenHistogram.set(tokenHash, (this.tokenHistogram.get(tokenHash) || 0) + 1);
filter = filterFromCompiledData(fdata); filter = filterFromCompiledData(fdata);
if ( entry === undefined ) { if ( entry === undefined ) {
bucket.set(token, filter); bucket.set(tokenHash, filter);
continue; continue;
} }
if ( entry.fidPrefix === buckerFilterFid ) { if ( entry.fidPrefix === buckerFilterFid ) {
entry.add(filter); entry.add(filter);
continue; continue;
} }
bucket.set(token, new FilterBucket(entry, filter)); bucket.set(tokenHash, new FilterBucket(entry, filter));
} }
}; };
@ -2237,16 +2248,16 @@ FilterContainer.prototype.fromCompiledContent = function(lineIter) {
FilterContainer.prototype.removeBadFilters = function() { FilterContainer.prototype.removeBadFilters = function() {
var lines = µb.setToArray(this.badFilters), var lines = µb.setToArray(this.badFilters),
fieldIter = new µb.FieldIterator('\v'), fieldIter = new µb.FieldIterator('\v'),
hash, token, fdata, bucket, entry, categoryBits, tokenHash, fdata, bucket, entry,
i = lines.length; i = lines.length;
while ( i-- ) { while ( i-- ) {
hash = importMapKey(fieldIter.first(lines[i])); categoryBits = importInt(fieldIter.first(lines[i]));
bucket = this.categories.get(hash); bucket = this.categories.get(categoryBits);
if ( bucket === undefined ) { if ( bucket === undefined ) {
continue; continue;
} }
token = fieldIter.next(); tokenHash = importInt(fieldIter.next());
entry = bucket.get(token); entry = bucket.get(tokenHash);
if ( entry === undefined ) { if ( entry === undefined ) {
continue; continue;
} }
@ -2254,24 +2265,24 @@ FilterContainer.prototype.removeBadFilters = function() {
if ( entry instanceof FilterBucket ) { if ( entry instanceof FilterBucket ) {
entry.remove(fdata); entry.remove(fdata);
if ( entry.filters.length === 1 ) { if ( entry.filters.length === 1 ) {
bucket.set(token, entry.filters[0]); bucket.set(tokenHash, entry.filters[0]);
} }
continue; continue;
} }
if ( entry instanceof FilterHostnameDict ) { if ( entry instanceof FilterHostnameDict ) {
entry.remove(fdata); entry.remove(fdata);
if ( entry.size === 0 ) { if ( entry.size === 0 ) {
bucket.delete(token); bucket.delete(tokenHash);
if ( bucket.size === 0 ) { if ( bucket.size === 0 ) {
this.categories.delete(hash); this.categories.delete(categoryBits);
} }
} }
continue; continue;
} }
if ( entry.compile() === fdata ) { if ( entry.compile() === fdata ) {
bucket.delete(token); bucket.delete(tokenHash);
if ( bucket.size === 0 ) { if ( bucket.size === 0 ) {
this.categories.delete(hash); this.categories.delete(categoryBits);
} }
continue; continue;
} }
@ -2293,20 +2304,20 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
toRemove = new Map(); toRemove = new Map();
var entry, f, var entry, f,
tokens = this.urlTokenizer.getTokens(), tokenHashes = this.urlTokenizer.getTokens(),
tokenEntry, token, tokenHash, tokenOffset,
i = 0; i = 0;
while ( i < 16 ) { while ( i < 32 ) {
tokenEntry = tokens[i++]; tokenHash = tokenHashes[i++];
token = tokenEntry.token; if ( tokenHash === 0 ) { break; }
if ( !token ) { break; } tokenOffset = tokenHashes[i++];
entry = this.dataFilters.get(token); entry = this.dataFilters.get(tokenHash);
while ( entry !== undefined ) { while ( entry !== undefined ) {
f = entry.filter; f = entry.filter;
if ( f.match(url, tokenEntry.beg) === true ) { if ( f.match(url, tokenOffset) === true ) {
if ( entry.keyBits & 0x001 ) { if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry); toRemove.set(f.dataStr, entry);
} else if ( entry.keyBits & 0x002 ) { } else if ( entry.categoryBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry); toAddImportant.set(f.dataStr, entry);
} else { } else {
toAdd.set(f.dataStr, entry); toAdd.set(f.dataStr, entry);
@ -2315,13 +2326,13 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
entry = entry.next; entry = entry.next;
} }
} }
entry = this.dataFilters.get('*'); entry = this.dataFilters.get(this.noTokenHash);
while ( entry !== undefined ) { while ( entry !== undefined ) {
f = entry.filter; f = entry.filter;
if ( f.match(url, tokenEntry.beg) === true ) { if ( f.match(url) === true ) {
if ( entry.keyBits & 0x001 ) { if ( entry.categoryBits & 0x001 ) {
toRemove.set(f.dataStr, entry); toRemove.set(f.dataStr, entry);
} else if ( entry.keyBits & 0x002 ) { } else if ( entry.categoryBits & 0x002 ) {
toAddImportant.set(f.dataStr, entry); toAddImportant.set(f.dataStr, entry);
} else { } else {
toAdd.set(f.dataStr, entry); toAdd.set(f.dataStr, entry);
@ -2397,32 +2408,32 @@ FilterContainer.prototype.matchAndFetchData = function(dataType, requestURL, out
FilterContainer.prototype.matchTokens = function(bucket, url) { FilterContainer.prototype.matchTokens = function(bucket, url) {
// Hostname-only filters // Hostname-only filters
var f = bucket.get('.'); var f = bucket.get(this.dotTokenHash);
if ( f !== undefined && f.match() ) { if ( f !== undefined && f.match() ) {
this.tokenRegister = '.'; this.thRegister = this.dotTokenHash;
this.fRegister = f; this.fRegister = f;
return true; return true;
} }
var tokens = this.urlTokenizer.getTokens(), var tokenHashes = this.urlTokenizer.getTokens(),
tokenEntry, token, tokenHash, tokenOffset,
i = 0; i = 0;
for (;;) { for (;;) {
tokenEntry = tokens[i++]; tokenHash = tokenHashes[i++];
token = tokenEntry.token; if ( tokenHash === 0 ) { break; }
if ( !token ) { break; } tokenOffset = tokenHashes[i++];
f = bucket.get(token); f = bucket.get(tokenHash);
if ( f !== undefined && f.match(url, tokenEntry.beg) ) { if ( f !== undefined && f.match(url, tokenOffset) === true ) {
this.tokenRegister = token; this.thRegister = tokenHash;
this.fRegister = f; this.fRegister = f;
return true; return true;
} }
} }
// Untokenizable filters // Untokenizable filters
f = bucket.get('*'); f = bucket.get(this.noTokenHash);
if ( f !== undefined && f.match(url) ) { if ( f !== undefined && f.match(url) === true ) {
this.tokenRegister = '*'; this.thRegister = this.noTokenHash;
this.fRegister = f; this.fRegister = f;
return true; return true;
} }
@ -2456,11 +2467,11 @@ FilterContainer.prototype.matchStringGenericHide = function(context, requestURL)
bucket = this.categories.get(genericHideImportant); bucket = this.categories.get(genericHideImportant);
if ( bucket && this.matchTokens(bucket, url) ) { if ( bucket && this.matchTokens(bucket, url) ) {
this.keyRegister = genericHideImportant; this.cbRegister = genericHideImportant;
return 1; return 1;
} }
this.keyRegister = genericHideException; this.cbRegister = genericHideException;
return 2; return 2;
}; };
@ -2489,39 +2500,39 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r
var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty, var party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty,
categories = this.categories, categories = this.categories,
key, bucket; catBits, bucket;
this.fRegister = null; this.fRegister = null;
// https://github.com/chrisaljoudi/uBlock/issues/139 // https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters // Test against important block filters
key = BlockAnyParty | Important | type; catBits = BlockAnyParty | Important | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
key = BlockAction | Important | type | party; catBits = BlockAction | Important | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
// Test against block filters // Test against block filters
key = BlockAnyParty | type; catBits = BlockAnyParty | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
if ( this.fRegister === null ) { if ( this.fRegister === null ) {
key = BlockAction | type | party; catBits = BlockAction | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
} }
@ -2532,17 +2543,17 @@ FilterContainer.prototype.matchStringExactType = function(context, requestURL, r
} }
// Test against allow filters // Test against allow filters
key = AllowAnyParty | type; catBits = AllowAnyParty | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
key = AllowAction | type | party; catBits = AllowAction | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
@ -2597,68 +2608,68 @@ FilterContainer.prototype.matchString = function(context) {
? FirstParty ? FirstParty
: ThirdParty; : ThirdParty;
var categories = this.categories, var categories = this.categories,
key, bucket; catBits, bucket;
// https://github.com/chrisaljoudi/uBlock/issues/139 // https://github.com/chrisaljoudi/uBlock/issues/139
// Test against important block filters. // Test against important block filters.
// The purpose of the `important` option is to reverse the order of // The purpose of the `important` option is to reverse the order of
// evaluation. Normally, it is "evaluate block then evaluate allow", with // evaluation. Normally, it is "evaluate block then evaluate allow", with
// the `important` property it is "evaluate allow then evaluate block". // the `important` property it is "evaluate allow then evaluate block".
key = BlockAnyTypeAnyParty | Important; catBits = BlockAnyTypeAnyParty | Important;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
key = BlockAnyType | Important | party; catBits = BlockAnyType | Important | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
key = BlockAnyParty | Important | type; catBits = BlockAnyParty | Important | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
key = BlockAction | Important | type | party; catBits = BlockAction | Important | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 1; return 1;
} }
} }
// Test against block filters // Test against block filters
key = BlockAnyTypeAnyParty; catBits = BlockAnyTypeAnyParty;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
if ( this.fRegister === null ) { if ( this.fRegister === null ) {
key = BlockAnyType | party; catBits = BlockAnyType | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
if ( this.fRegister === null ) { if ( this.fRegister === null ) {
key = BlockAnyParty | type; catBits = BlockAnyParty | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
if ( this.fRegister === null ) { if ( this.fRegister === null ) {
key = BlockAction | type | party; catBits = BlockAction | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
} }
} }
} }
@ -2671,31 +2682,31 @@ FilterContainer.prototype.matchString = function(context) {
} }
// Test against allow filters // Test against allow filters
key = AllowAnyTypeAnyParty; catBits = AllowAnyTypeAnyParty;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
key = AllowAnyType | party; catBits = AllowAnyType | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
key = AllowAnyParty | type; catBits = AllowAnyParty | type;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
key = AllowAction | type | party; catBits = AllowAction | type | party;
if ( (bucket = categories.get(key)) ) { if ( (bucket = categories.get(catBits)) ) {
if ( this.matchTokens(bucket, url) ) { if ( this.matchTokens(bucket, url) ) {
this.keyRegister = key; this.cbRegister = catBits;
return 2; return 2;
} }
} }
@ -2707,10 +2718,10 @@ FilterContainer.prototype.matchString = function(context) {
FilterContainer.prototype.toLogData = function() { FilterContainer.prototype.toLogData = function() {
if ( this.fRegister === null ) { return; } if ( this.fRegister === null ) { return; }
var logData = toLogDataInternal(this.keyRegister, this.tokenRegister, this.fRegister); var logData = toLogDataInternal(this.cbRegister, this.thRegister, this.fRegister);
logData.source = 'static'; logData.source = 'static';
logData.token = this.tokenRegister; logData.tokenHash = this.thRegister;
logData.result = this.fRegister === null ? 0 : (this.keyRegister & 1 ? 2 : 1); logData.result = this.fRegister === null ? 0 : (this.cbRegister & 1 ? 2 : 1);
return logData; return logData;
}; };

View File

@ -639,11 +639,13 @@ vAPI.tabs.onPopupUpdated = (function() {
if ( if (
logData === undefined || logData === undefined ||
logData.source !== 'static' || logData.source !== 'static' ||
logData.token === '*' logData.token === µb.staticNetFilteringEngine.noTokenHash
) { ) {
return 0; return 0;
} }
if ( logData.token === '.' ) { return result; } if ( logData.token === µb.staticNetFilteringEngine.dotTokenHash ) {
return result;
}
var re = new RegExp(logData.regex), var re = new RegExp(logData.regex),
matches = re.exec(popunderURL); matches = re.exec(popunderURL);
if ( matches === null ) { return ''; } if ( matches === null ) { return ''; }

View File

@ -29,6 +29,17 @@
// - Tokenize only on demand. // - Tokenize only on demand.
// - To potentially avoid tokenizing when same URL is fed to tokenizer. // - To potentially avoid tokenizing when same URL is fed to tokenizer.
// - Benchmarking shows this to be a common occurrence. // - Benchmarking shows this to be a common occurrence.
//
// https://github.com/gorhill/uBlock/issues/2630
// Slice input URL into a list of integer-safe token values, instead of a list
// of substrings. The assumption is that with dealing only with numeric
// values, less underlying memory allocations, and also as a consequence
// less work for the garbage collector down the road.
// Another assumption is that using a numeric-based key value for Map() is
// more efficient than string-based key value (but that is something I would
// have to benchmark).
// Benchmark for string-based tokens vs. integer-safe token values:
// https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html
µBlock.urlTokenizer = { µBlock.urlTokenizer = {
setURL: function(url) { setURL: function(url) {
@ -43,71 +54,76 @@
// Tokenize on demand. // Tokenize on demand.
getTokens: function() { getTokens: function() {
if ( this._tokenized === false ) { if ( this._tokenized === false ) {
if ( this._gcAfter === undefined ) {
this._gcAfter = Date.now() + 1499;
}
this._tokenize(); this._tokenize();
this._tokenized = true; this._tokenized = true;
} }
return this._tokens; return this._tokens;
}, },
isTokenized: function() { tokenHashFromString: function(s) {
return this._tokens !== null && this._tokens[0].token !== ''; var l = s.length;
}, if ( l === 0 ) { return 0; }
if ( l === 1 ) {
_Entry: function() { if ( s === '*' ) { return 63; }
this.beg = 0; if ( s === '.' ) { return 62; }
this.token = undefined;
},
// https://github.com/chrisaljoudi/uBlock/issues/1118
// We limit to a maximum number of tokens.
_init: function() {
this._tokens = new Array(2048);
for ( var i = 0; i < 2048; i++ ) {
this._tokens[i] = new this._Entry();
} }
this._init = null; var vtc = this._validTokenChars,
th = vtc[s.charCodeAt(0)];
for ( var i = 1; i !== 8 && i !== l; i++ ) {
th = th * 64 + vtc[s.charCodeAt(i)];
}
return th;
}, },
_tokenize: function() { _tokenize: function() {
var tokens = this._tokens, var tokens = this._tokens,
re = this._reAnyToken, url = this._urlOut,
url = this._urlOut; l = url.length;
var matches, entry; if ( l === 0 ) { tokens[0] = 0; return; }
re.lastIndex = 0; // https://github.com/chrisaljoudi/uBlock/issues/1118
for ( var i = 0; i < 2047; i++ ) { // We limit to a maximum number of tokens.
matches = re.exec(url); if ( l > 2048 ) {
if ( matches === null ) { break; } url = url.slice(0, 2048);
entry = tokens[i]; l = 2048;
entry.beg = matches.index;
entry.token = matches[0];
} }
tokens[i].token = ''; // Sentinel var i = 0, j = 0, v, n, ti, th,
// Could no-longer-used-but-still-referenced string fragments vtc = this._validTokenChars;
// contribute to memory fragmentation in the long-term? The code below for (;;) {
// is to address this: left over unused string fragments are removed at for (;;) {
// regular interval. if ( i === l ) { tokens[j] = 0; return; }
if ( Date.now() < this._gcAfter ) { return; } v = vtc[url.charCodeAt(i++)];
this._gcAfter = undefined; if ( v !== 0 ) { break; }
for ( i += 1; i < 2047; i++ ) { }
entry = tokens[i]; th = v; ti = i - 1; n = 1;
if ( entry.token === undefined ) { break; } for (;;) {
entry.token = undefined; if ( i === l ) { break; }
v = vtc[url.charCodeAt(i++)];
if ( v === 0 ) { break; }
if ( n !== 8 ) {
th = th * 64 + v;
n += 1;
}
}
tokens[j++] = th;
tokens[j++] = ti;
} }
}, },
_urlIn: '', _urlIn: '',
_urlOut: '', _urlOut: '',
_tokenized: false, _tokenized: false,
_tokens: null, _tokens: [ 0 ],
_reAnyToken: /[%0-9a-z]+/g, _validTokenChars: (function() {
_gcAfter: undefined var vtc = new Uint8Array(128),
chars = '0123456789%abcdefghijklmnopqrstuvwxyz',
i = chars.length;
while ( i-- ) {
vtc[chars.charCodeAt(i)] = i + 1;
}
return vtc;
})()
}; };
µBlock.urlTokenizer._init();
/******************************************************************************/ /******************************************************************************/
µBlock.formatCount = function(count) { µBlock.formatCount = function(count) {