mirror of https://github.com/gorhill/uBlock.git
Use buffer-like approach for filterUnits array
filterUnits is now treated as a buffer which is pre-allocated and which will grow in chunks so as to minimize memory allocations. Entries are never released, just null-ed. Additionally, move urlTokenizer into the static network filtering engine, since it's not used anywhere else.
This commit is contained in:
parent
76887c0716
commit
0196993828
|
@ -401,7 +401,7 @@ if (
|
|||
const µb = µBlock;
|
||||
const writer = new µb.CompiledLineIO.Writer();
|
||||
const parser = new vAPI.StaticFilteringParser();
|
||||
parser.setMaxTokenLength(µb.urlTokenizer.MAX_TOKEN_LENGTH);
|
||||
parser.setMaxTokenLength(µb.staticNetFilteringEngine.MAX_TOKEN_LENGTH);
|
||||
parser.analyze(rawFilter);
|
||||
|
||||
if ( µb.staticNetFilteringEngine.compile(parser, writer) === false ) {
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
/******************************************************************************/
|
||||
|
||||
const µb = µBlock;
|
||||
const urlTokenizer = µb.urlTokenizer;
|
||||
|
||||
// fedcba9876543210
|
||||
// | | || |
|
||||
|
@ -281,11 +280,33 @@ const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
const filterUnits = [ null ];
|
||||
// Initial size should be enough for default set of filter lists.
|
||||
const filterUnits = JSON.parse(`[${'null,'.repeat(65535)}null]`);
|
||||
let filterUnitWritePtr = 1;
|
||||
const FILTER_UNITS_MIN = filterUnitWritePtr;
|
||||
|
||||
const filterUnitAdd = function(f) {
|
||||
const i = filterUnitWritePtr;
|
||||
filterUnitWritePtr += 1;
|
||||
if ( filterUnitWritePtr > filterUnits.length ) {
|
||||
filterUnitBufferResize(filterUnitWritePtr);
|
||||
}
|
||||
filterUnits[i] = f;
|
||||
return i;
|
||||
};
|
||||
|
||||
const filterUnitBufferResize = function(newSize) {
|
||||
if ( newSize <= filterUnits.length ) { return; }
|
||||
const size = (newSize + 0x0FFF) & ~0x0FFF;
|
||||
for ( let i = filterUnits.length; i < size; i++ ) {
|
||||
filterUnits[i] = null;
|
||||
}
|
||||
};
|
||||
|
||||
// Initial size should be enough for default set of filter lists.
|
||||
const filterSequences = JSON.parse(`[${'0,'.repeat(163839)}0]`);
|
||||
let filterSequenceWritePtr = 3;
|
||||
const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
|
||||
|
||||
const filterSequenceAdd = function(a, b) {
|
||||
const i = filterSequenceWritePtr;
|
||||
|
@ -354,27 +375,15 @@ const registerFilterClass = function(ctor) {
|
|||
filterClasses[fid] = ctor;
|
||||
};
|
||||
|
||||
const filterFromCtor = function(ctor, ...args) {
|
||||
if ( ctor.filterUnit !== undefined ) {
|
||||
return ctor.filterUnit;
|
||||
}
|
||||
const f = new ctor(...args);
|
||||
const iunit = filterUnits.length;
|
||||
filterUnits.push(f);
|
||||
return iunit;
|
||||
};
|
||||
const filterUnitFromCtor = (ctor, ...args) => filterUnitAdd(new ctor(...args));
|
||||
|
||||
const filterUnitFromFilter = function(f) {
|
||||
const iunit = filterUnits.length;
|
||||
filterUnits.push(f);
|
||||
return iunit;
|
||||
};
|
||||
const filterUnitFromFilter = f => filterUnitAdd(f);
|
||||
|
||||
const filterUnitFromCompiled = function(args) {
|
||||
const ctor = filterClasses[args[0]];
|
||||
const keygen = ctor.keyFromArgs;
|
||||
if ( keygen === undefined ) {
|
||||
return filterUnits.push(ctor.fromCompiled(args)) - 1;
|
||||
return filterUnitAdd(ctor.fromCompiled(args));
|
||||
}
|
||||
let key = ctor.fidstr;
|
||||
const keyargs = keygen(args);
|
||||
|
@ -382,10 +391,9 @@ const filterUnitFromCompiled = function(args) {
|
|||
key += `\t${keyargs}`;
|
||||
}
|
||||
let iunit = filterArgsToUnit.get(key);
|
||||
if ( iunit === undefined ) {
|
||||
iunit = filterUnits.push(ctor.fromCompiled(args)) - 1;
|
||||
filterArgsToUnit.set(key, iunit);
|
||||
}
|
||||
if ( iunit !== undefined ) { return iunit; }
|
||||
iunit = filterUnitAdd(ctor.fromCompiled(args));
|
||||
filterArgsToUnit.set(key, iunit);
|
||||
return iunit;
|
||||
};
|
||||
|
||||
|
@ -2170,7 +2178,7 @@ const FilterBucket = class extends FilterCollection {
|
|||
i = inext;
|
||||
}
|
||||
bucket.originTestUnit =
|
||||
filterFromCtor(FilterOriginHitSet, domainOpts.join('|'));
|
||||
filterUnitFromCtor(FilterOriginHitSet, domainOpts.join('|'));
|
||||
return bucket;
|
||||
}
|
||||
};
|
||||
|
@ -2207,10 +2215,187 @@ const FilterBucketOfOriginHits = class extends FilterBucket {
|
|||
|
||||
registerFilterClass(FilterBucketOfOriginHits);
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
|
||||
const FILTER_UNITS_MIN = filterUnits.length;
|
||||
const FILTER_SEQUENCES_MIN = filterSequenceWritePtr;
|
||||
// https://github.com/gorhill/uBlock/issues/2630
|
||||
// Slice input URL into a list of safe-integer token values, instead of a list
|
||||
// of substrings. The assumption is that with dealing only with numeric
|
||||
// values, less underlying memory allocations, and also as a consequence
|
||||
// less work for the garbage collector down the road.
|
||||
// Another assumption is that using a numeric-based key value for Map() is
|
||||
// more efficient than string-based key value (but that is something I would
|
||||
// have to benchmark).
|
||||
// Benchmark for string-based tokens vs. safe-integer token values:
|
||||
// https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html
|
||||
|
||||
const urlTokenizer = new (class {
|
||||
constructor() {
|
||||
this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz';
|
||||
this._validTokenChars = new Uint8Array(128);
|
||||
for ( let i = 0, n = this._chars.length; i < n; i++ ) {
|
||||
this._validTokenChars[this._chars.charCodeAt(i)] = i + 1;
|
||||
}
|
||||
// Four upper bits of token hash are reserved for built-in predefined
|
||||
// token hashes, which should never end up being used when tokenizing
|
||||
// any arbitrary string.
|
||||
this.dotTokenHash = 0x10000000;
|
||||
this.anyTokenHash = 0x20000000;
|
||||
this.anyHTTPSTokenHash = 0x30000000;
|
||||
this.anyHTTPTokenHash = 0x40000000;
|
||||
this.noTokenHash = 0x50000000;
|
||||
this.emptyTokenHash = 0xF0000000;
|
||||
|
||||
this._urlIn = '';
|
||||
this._urlOut = '';
|
||||
this._tokenized = false;
|
||||
this._hasQuery = 0;
|
||||
// https://www.reddit.com/r/uBlockOrigin/comments/dzw57l/
|
||||
// Remember: 1 token needs two slots
|
||||
this._tokens = new Uint32Array(2064);
|
||||
|
||||
this.knownTokens = new Uint8Array(65536);
|
||||
this.resetKnownTokens();
|
||||
this.MAX_TOKEN_LENGTH = 7;
|
||||
}
|
||||
|
||||
setURL(url) {
|
||||
if ( url !== this._urlIn ) {
|
||||
this._urlIn = url;
|
||||
this._urlOut = url.toLowerCase();
|
||||
this._hasQuery = 0;
|
||||
this._tokenized = false;
|
||||
}
|
||||
return this._urlOut;
|
||||
}
|
||||
|
||||
resetKnownTokens() {
|
||||
this.knownTokens.fill(0);
|
||||
this.addKnownToken(this.dotTokenHash);
|
||||
this.addKnownToken(this.anyTokenHash);
|
||||
this.addKnownToken(this.anyHTTPSTokenHash);
|
||||
this.addKnownToken(this.anyHTTPTokenHash);
|
||||
this.addKnownToken(this.noTokenHash);
|
||||
}
|
||||
|
||||
addKnownToken(th) {
|
||||
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
|
||||
}
|
||||
|
||||
// Tokenize on demand.
|
||||
getTokens(encodeInto) {
|
||||
if ( this._tokenized ) { return this._tokens; }
|
||||
let i = this._tokenize(encodeInto);
|
||||
this._tokens[i+0] = this.anyTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
if ( this._urlOut.startsWith('https://') ) {
|
||||
this._tokens[i+0] = this.anyHTTPSTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
} else if ( this._urlOut.startsWith('http://') ) {
|
||||
this._tokens[i+0] = this.anyHTTPTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
}
|
||||
this._tokens[i+0] = this.noTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
this._tokens[i+2] = 0;
|
||||
this._tokenized = true;
|
||||
return this._tokens;
|
||||
}
|
||||
|
||||
hasQuery() {
|
||||
if ( this._hasQuery === 0 ) {
|
||||
const i = this._urlOut.indexOf('?');
|
||||
this._hasQuery = i !== -1 ? i + 1 : -1;
|
||||
}
|
||||
return this._hasQuery > 0;
|
||||
}
|
||||
|
||||
tokenHashFromString(s) {
|
||||
const l = s.length;
|
||||
if ( l === 0 ) { return this.emptyTokenHash; }
|
||||
const vtc = this._validTokenChars;
|
||||
let th = vtc[s.charCodeAt(0)];
|
||||
for ( let i = 1; i !== 7 && i !== l; i++ ) {
|
||||
th = th << 4 ^ vtc[s.charCodeAt(i)];
|
||||
}
|
||||
return th;
|
||||
}
|
||||
|
||||
stringFromTokenHash(th) {
|
||||
if ( th === 0 ) { return ''; }
|
||||
return th.toString(16);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return µBlock.base64.encode(
|
||||
this.knownTokens.buffer,
|
||||
this.knownTokens.byteLength
|
||||
);
|
||||
}
|
||||
|
||||
fromSelfie(selfie) {
|
||||
return µBlock.base64.decode(selfie, this.knownTokens.buffer);
|
||||
}
|
||||
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/1118
|
||||
// We limit to a maximum number of tokens.
|
||||
|
||||
_tokenize(encodeInto) {
|
||||
const tokens = this._tokens;
|
||||
let url = this._urlOut;
|
||||
let l = url.length;
|
||||
if ( l === 0 ) { return 0; }
|
||||
if ( l > 2048 ) {
|
||||
url = url.slice(0, 2048);
|
||||
l = 2048;
|
||||
}
|
||||
encodeInto.haystackLen = l;
|
||||
let j = 0;
|
||||
let hasq = -1;
|
||||
mainLoop: {
|
||||
const knownTokens = this.knownTokens;
|
||||
const vtc = this._validTokenChars;
|
||||
const charCodes = encodeInto.haystack;
|
||||
let i = 0, n = 0, ti = 0, th = 0;
|
||||
for (;;) {
|
||||
for (;;) {
|
||||
if ( i === l ) { break mainLoop; }
|
||||
const cc = url.charCodeAt(i);
|
||||
charCodes[i] = cc;
|
||||
i += 1;
|
||||
th = vtc[cc];
|
||||
if ( th !== 0 ) { break; }
|
||||
if ( cc === 0x3F /* '?' */ ) { hasq = i; }
|
||||
}
|
||||
ti = i - 1; n = 1;
|
||||
for (;;) {
|
||||
if ( i === l ) { break; }
|
||||
const cc = url.charCodeAt(i);
|
||||
charCodes[i] = cc;
|
||||
i += 1;
|
||||
const v = vtc[cc];
|
||||
if ( v === 0 ) {
|
||||
if ( cc === 0x3F /* '?' */ ) { hasq = i; }
|
||||
break;
|
||||
}
|
||||
if ( n === 7 ) { continue; }
|
||||
th = th << 4 ^ v;
|
||||
n += 1;
|
||||
}
|
||||
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
|
||||
tokens[j+0] = th;
|
||||
tokens[j+1] = ti;
|
||||
j += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
this._hasQuery = hasq;
|
||||
return j;
|
||||
}
|
||||
})();
|
||||
|
||||
/******************************************************************************/
|
||||
/******************************************************************************/
|
||||
|
@ -2821,6 +3006,7 @@ FilterParser.parse = (( ) => {
|
|||
/******************************************************************************/
|
||||
|
||||
const FilterContainer = function() {
|
||||
this.MAX_TOKEN_LENGTH = urlTokenizer.MAX_TOKEN_LENGTH;
|
||||
this.noTokenHash = urlTokenizer.noTokenHash;
|
||||
this.dotTokenHash = urlTokenizer.dotTokenHash;
|
||||
this.anyTokenHash = urlTokenizer.anyTokenHash;
|
||||
|
@ -2869,7 +3055,7 @@ FilterContainer.prototype.reset = function() {
|
|||
bidiTrie.reset();
|
||||
filterArgsToUnit.clear();
|
||||
|
||||
filterUnits.length = FILTER_UNITS_MIN;
|
||||
filterUnitWritePtr = FILTER_UNITS_MIN;
|
||||
filterSequenceWritePtr = FILTER_SEQUENCES_MIN;
|
||||
|
||||
// Cancel potentially pending optimization run.
|
||||
|
@ -2914,7 +3100,7 @@ FilterContainer.prototype.freeze = function() {
|
|||
|
||||
if ( tokenHash === this.dotTokenHash ) {
|
||||
if ( iunit === undefined ) {
|
||||
iunit = filterFromCtor(FilterHostnameDict);
|
||||
iunit = filterUnitFromCtor(FilterHostnameDict);
|
||||
bucket.set(this.dotTokenHash, iunit);
|
||||
}
|
||||
filterUnits[iunit].add(fdata);
|
||||
|
@ -2923,7 +3109,7 @@ FilterContainer.prototype.freeze = function() {
|
|||
|
||||
if ( tokenHash === this.anyTokenHash ) {
|
||||
if ( iunit === undefined ) {
|
||||
iunit = filterFromCtor(FilterJustOrigin);
|
||||
iunit = filterUnitFromCtor(FilterJustOrigin);
|
||||
bucket.set(this.anyTokenHash, iunit);
|
||||
}
|
||||
filterUnits[iunit].add(fdata);
|
||||
|
@ -2932,7 +3118,7 @@ FilterContainer.prototype.freeze = function() {
|
|||
|
||||
if ( tokenHash === this.anyHTTPSTokenHash ) {
|
||||
if ( iunit === undefined ) {
|
||||
iunit = filterFromCtor(FilterHTTPSJustOrigin);
|
||||
iunit = filterUnitFromCtor(FilterHTTPSJustOrigin);
|
||||
bucket.set(this.anyHTTPSTokenHash, iunit);
|
||||
}
|
||||
filterUnits[iunit].add(fdata);
|
||||
|
@ -2941,7 +3127,7 @@ FilterContainer.prototype.freeze = function() {
|
|||
|
||||
if ( tokenHash === this.anyHTTPTokenHash ) {
|
||||
if ( iunit === undefined ) {
|
||||
iunit = filterFromCtor(FilterHTTPJustOrigin);
|
||||
iunit = filterUnitFromCtor(FilterHTTPJustOrigin);
|
||||
bucket.set(this.anyHTTPTokenHash, iunit);
|
||||
}
|
||||
filterUnits[iunit].add(fdata);
|
||||
|
@ -2961,7 +3147,7 @@ FilterContainer.prototype.freeze = function() {
|
|||
f.unshift(inewunit);
|
||||
continue;
|
||||
}
|
||||
const ibucketunit = filterFromCtor(FilterBucket);
|
||||
const ibucketunit = filterUnitFromCtor(FilterBucket);
|
||||
f = filterUnits[ibucketunit];
|
||||
f.unshift(iunit);
|
||||
f.unshift(inewunit);
|
||||
|
@ -2996,6 +3182,10 @@ FilterContainer.prototype.freeze = function() {
|
|||
}
|
||||
FilterHostnameDict.optimize();
|
||||
bidiTrieOptimize();
|
||||
// Be sure unused filters can be garbage collected.
|
||||
for ( let i = filterUnitWritePtr, n = filterUnits.length; i < n; i++ ) {
|
||||
filterUnits[i] = null;
|
||||
}
|
||||
}, { timeout: 15000 });
|
||||
|
||||
log.info(`staticNetFilteringEngine.freeze() took ${Date.now()-t0} ms`);
|
||||
|
@ -3048,7 +3238,7 @@ FilterContainer.prototype.toSelfie = function(path) {
|
|||
discardedCount: this.discardedCount,
|
||||
categories: categoriesToSelfie(),
|
||||
urlTokenizer: urlTokenizer.toSelfie(),
|
||||
filterUnits: filterUnits.map(f =>
|
||||
filterUnits: filterUnits.slice(0, filterUnitWritePtr).map(f =>
|
||||
f !== null ? f.toSelfie() : null
|
||||
),
|
||||
})
|
||||
|
@ -3082,11 +3272,11 @@ FilterContainer.prototype.fromSelfie = function(path) {
|
|||
const size = µb.base64.decodeSize(details.content) >> 2;
|
||||
if ( size === 0 ) { return false; }
|
||||
filterSequenceBufferResize(size);
|
||||
filterSequenceWritePtr = size;
|
||||
const buf32 = µb.base64.decode(details.content);
|
||||
for ( let i = 0; i < size; i++ ) {
|
||||
filterSequences[i] = buf32[i];
|
||||
}
|
||||
filterSequenceWritePtr = size;
|
||||
return true;
|
||||
}),
|
||||
µb.assets.get(`${path}/main`).then(details => {
|
||||
|
@ -3105,6 +3295,8 @@ FilterContainer.prototype.fromSelfie = function(path) {
|
|||
urlTokenizer.fromSelfie(selfie.urlTokenizer);
|
||||
{
|
||||
const fselfies = selfie.filterUnits;
|
||||
filterUnitWritePtr = fselfies.length;
|
||||
filterUnitBufferResize(filterUnitWritePtr);
|
||||
for ( let i = 0, n = fselfies.length; i < n; i++ ) {
|
||||
const f = fselfies[i];
|
||||
filterUnits[i] = f !== null ? filterFromSelfie(f) : null;
|
||||
|
|
|
@ -849,7 +849,7 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
|
|||
const lineIter = new this.LineIterator(this.preparseDirectives.prune(rawText));
|
||||
const parser = new vAPI.StaticFilteringParser();
|
||||
|
||||
parser.setMaxTokenLength(this.urlTokenizer.MAX_TOKEN_LENGTH);
|
||||
parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);
|
||||
|
||||
while ( lineIter.eot() === false ) {
|
||||
let line = lineIter.next();
|
||||
|
|
188
src/js/utils.js
188
src/js/utils.js
|
@ -23,194 +23,6 @@
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
// A standalone URL tokenizer will allow us to use URL tokens in more than
|
||||
// just static filtering engine. This opens the door to optimize other
|
||||
// filtering engine parts aside static filtering. This also allows:
|
||||
// - Tokenize only on demand.
|
||||
// - To potentially avoid tokenizing when same URL is fed to tokenizer.
|
||||
// - Benchmarking shows this to be a common occurrence.
|
||||
//
|
||||
// https://github.com/gorhill/uBlock/issues/2630
|
||||
// Slice input URL into a list of safe-integer token values, instead of a list
|
||||
// of substrings. The assumption is that with dealing only with numeric
|
||||
// values, less underlying memory allocations, and also as a consequence
|
||||
// less work for the garbage collector down the road.
|
||||
// Another assumption is that using a numeric-based key value for Map() is
|
||||
// more efficient than string-based key value (but that is something I would
|
||||
// have to benchmark).
|
||||
// Benchmark for string-based tokens vs. safe-integer token values:
|
||||
// https://gorhill.github.io/obj-vs-set-vs-map/tokenize-to-str-vs-to-int.html
|
||||
|
||||
µBlock.urlTokenizer = new (class {
|
||||
constructor() {
|
||||
this._chars = '0123456789%abcdefghijklmnopqrstuvwxyz';
|
||||
this._validTokenChars = new Uint8Array(128);
|
||||
for ( let i = 0, n = this._chars.length; i < n; i++ ) {
|
||||
this._validTokenChars[this._chars.charCodeAt(i)] = i + 1;
|
||||
}
|
||||
// Four upper bits of token hash are reserved for built-in predefined
|
||||
// token hashes, which should never end up being used when tokenizing
|
||||
// any arbitrary string.
|
||||
this.dotTokenHash = 0x10000000;
|
||||
this.anyTokenHash = 0x20000000;
|
||||
this.anyHTTPSTokenHash = 0x30000000;
|
||||
this.anyHTTPTokenHash = 0x40000000;
|
||||
this.noTokenHash = 0x50000000;
|
||||
this.emptyTokenHash = 0xF0000000;
|
||||
|
||||
this._urlIn = '';
|
||||
this._urlOut = '';
|
||||
this._tokenized = false;
|
||||
this._hasQuery = 0;
|
||||
// https://www.reddit.com/r/uBlockOrigin/comments/dzw57l/
|
||||
// Remember: 1 token needs two slots
|
||||
this._tokens = new Uint32Array(2064);
|
||||
|
||||
this.knownTokens = new Uint8Array(65536);
|
||||
this.resetKnownTokens();
|
||||
this.MAX_TOKEN_LENGTH = 7;
|
||||
}
|
||||
|
||||
setURL(url) {
|
||||
if ( url !== this._urlIn ) {
|
||||
this._urlIn = url;
|
||||
this._urlOut = url.toLowerCase();
|
||||
this._hasQuery = 0;
|
||||
this._tokenized = false;
|
||||
}
|
||||
return this._urlOut;
|
||||
}
|
||||
|
||||
resetKnownTokens() {
|
||||
this.knownTokens.fill(0);
|
||||
this.addKnownToken(this.dotTokenHash);
|
||||
this.addKnownToken(this.anyTokenHash);
|
||||
this.addKnownToken(this.anyHTTPSTokenHash);
|
||||
this.addKnownToken(this.anyHTTPTokenHash);
|
||||
this.addKnownToken(this.noTokenHash);
|
||||
}
|
||||
|
||||
addKnownToken(th) {
|
||||
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
|
||||
}
|
||||
|
||||
// Tokenize on demand.
|
||||
getTokens(encodeInto) {
|
||||
if ( this._tokenized ) { return this._tokens; }
|
||||
let i = this._tokenize(encodeInto);
|
||||
this._tokens[i+0] = this.anyTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
if ( this._urlOut.startsWith('https://') ) {
|
||||
this._tokens[i+0] = this.anyHTTPSTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
} else if ( this._urlOut.startsWith('http://') ) {
|
||||
this._tokens[i+0] = this.anyHTTPTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
i += 2;
|
||||
}
|
||||
this._tokens[i+0] = this.noTokenHash;
|
||||
this._tokens[i+1] = 0;
|
||||
this._tokens[i+2] = 0;
|
||||
this._tokenized = true;
|
||||
return this._tokens;
|
||||
}
|
||||
|
||||
hasQuery() {
|
||||
if ( this._hasQuery === 0 ) {
|
||||
const i = this._urlOut.indexOf('?');
|
||||
this._hasQuery = i !== -1 ? i + 1 : -1;
|
||||
}
|
||||
return this._hasQuery > 0;
|
||||
}
|
||||
|
||||
tokenHashFromString(s) {
|
||||
const l = s.length;
|
||||
if ( l === 0 ) { return this.emptyTokenHash; }
|
||||
const vtc = this._validTokenChars;
|
||||
let th = vtc[s.charCodeAt(0)];
|
||||
for ( let i = 1; i !== 7 && i !== l; i++ ) {
|
||||
th = th << 4 ^ vtc[s.charCodeAt(i)];
|
||||
}
|
||||
return th;
|
||||
}
|
||||
|
||||
stringFromTokenHash(th) {
|
||||
if ( th === 0 ) { return ''; }
|
||||
return th.toString(16);
|
||||
}
|
||||
|
||||
toSelfie() {
|
||||
return µBlock.base64.encode(
|
||||
this.knownTokens.buffer,
|
||||
this.knownTokens.byteLength
|
||||
);
|
||||
}
|
||||
|
||||
fromSelfie(selfie) {
|
||||
return µBlock.base64.decode(selfie, this.knownTokens.buffer);
|
||||
}
|
||||
|
||||
// https://github.com/chrisaljoudi/uBlock/issues/1118
|
||||
// We limit to a maximum number of tokens.
|
||||
|
||||
_tokenize(encodeInto) {
|
||||
const tokens = this._tokens;
|
||||
let url = this._urlOut;
|
||||
let l = url.length;
|
||||
if ( l === 0 ) { return 0; }
|
||||
if ( l > 2048 ) {
|
||||
url = url.slice(0, 2048);
|
||||
l = 2048;
|
||||
}
|
||||
encodeInto.haystackLen = l;
|
||||
let j = 0;
|
||||
let hasq = -1;
|
||||
mainLoop: {
|
||||
const knownTokens = this.knownTokens;
|
||||
const vtc = this._validTokenChars;
|
||||
const charCodes = encodeInto.haystack;
|
||||
let i = 0, n = 0, ti = 0, th = 0;
|
||||
for (;;) {
|
||||
for (;;) {
|
||||
if ( i === l ) { break mainLoop; }
|
||||
const cc = url.charCodeAt(i);
|
||||
charCodes[i] = cc;
|
||||
i += 1;
|
||||
th = vtc[cc];
|
||||
if ( th !== 0 ) { break; }
|
||||
if ( cc === 0x3F /* '?' */ ) { hasq = i; }
|
||||
}
|
||||
ti = i - 1; n = 1;
|
||||
for (;;) {
|
||||
if ( i === l ) { break; }
|
||||
const cc = url.charCodeAt(i);
|
||||
charCodes[i] = cc;
|
||||
i += 1;
|
||||
const v = vtc[cc];
|
||||
if ( v === 0 ) {
|
||||
if ( cc === 0x3F /* '?' */ ) { hasq = i; }
|
||||
break;
|
||||
}
|
||||
if ( n === 7 ) { continue; }
|
||||
th = th << 4 ^ v;
|
||||
n += 1;
|
||||
}
|
||||
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
|
||||
tokens[j+0] = th;
|
||||
tokens[j+1] = ti;
|
||||
j += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
this._hasQuery = hasq;
|
||||
return j;
|
||||
}
|
||||
})();
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
µBlock.formatCount = function(count) {
|
||||
if ( typeof count !== 'number' ) {
|
||||
return '';
|
||||
|
|
Loading…
Reference in New Issue