Minor fine-tuning of URL tokenizer

This commit is contained in:
Raymond Hill 2019-10-31 11:15:00 -04:00
parent d8975ee580
commit 11c56ab540
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
1 changed files with 6 additions and 13 deletions

View File

@ -66,9 +66,6 @@
this.knownTokens = new Uint8Array(65536); this.knownTokens = new Uint8Array(65536);
this.resetKnownTokens(); this.resetKnownTokens();
this.MAX_TOKEN_LENGTH = 7; this.MAX_TOKEN_LENGTH = 7;
this.charCodes = new Uint8Array(2048);
this.charCodeCount = 0;
} }
setURL(url) { setURL(url) {
@ -149,7 +146,7 @@
_tokenize(encodeInto) { _tokenize(encodeInto) {
const tokens = this._tokens; const tokens = this._tokens;
let url = this._urlOut; let url = this._urlOut;
let l = url.length | 0; let l = url.length;
if ( l === 0 ) { return 0; } if ( l === 0 ) { return 0; }
if ( l > 2048 ) { if ( l > 2048 ) {
url = url.slice(0, 2048); url = url.slice(0, 2048);
@ -159,22 +156,18 @@
const knownTokens = this.knownTokens; const knownTokens = this.knownTokens;
const vtc = this._validTokenChars; const vtc = this._validTokenChars;
const charCodes = encodeInto.haystack; const charCodes = encodeInto.haystack;
let i = 0, j = 0, c, v, n, ti, th; let i = 0, j = 0, n, ti, th;
for (;;) { for (;;) {
for (;;) { for (;;) {
if ( i === l ) { return j; } if ( i === l ) { return j; }
c = url.charCodeAt(i) | 0; th = vtc[(charCodes[i] = url.charCodeAt(i))];
charCodes[i] = c;
v = vtc[c];
i += 1; i += 1;
if ( v !== 0 ) { break; } if ( th !== 0 ) { break; }
} }
th = v; ti = i - 1; n = 1; ti = i - 1; n = 1;
for (;;) { for (;;) {
if ( i === l ) { break; } if ( i === l ) { break; }
c = url.charCodeAt(i) | 0; const v = vtc[(charCodes[i] = url.charCodeAt(i))];
charCodes[i] = c;
v = vtc[c];
i += 1; i += 1;
if ( v === 0 ) { break; } if ( v === 0 ) { break; }
if ( n === 7 ) { continue; } if ( n === 7 ) { continue; }