Improve the logger accuracy regarding matched part of the URL

Related issue:
- https://github.com/gorhill/uBlock/issues/3037

This takes care of the specific case reported. There are
other edge cases which are likely not addressed though, i.e.
those involving wildcards -- those should be rather rare and
at this point I rather leave them unaddressed to not
risk regressions (as they are less trivial to address).
This commit is contained in:
Raymond Hill 2021-07-11 10:11:26 -04:00
parent c2859712fb
commit 717d8593c5
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
1 changed files with 11 additions and 3 deletions

View File

@ -139,6 +139,8 @@ const typeValueToTypeName = [
//const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111; //const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111;
const MAX_TOKEN_LENGTH = 7;
/******************************************************************************/ /******************************************************************************/
// See the following as short-lived registers, used during evaluation. They are // See the following as short-lived registers, used during evaluation. They are
@ -574,6 +576,12 @@ const FilterPatternPlain = class {
const s = bidiTrie.extractString(this.i, this.n); const s = bidiTrie.extractString(this.i, this.n);
details.pattern.push(s); details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s)); details.regex.push(restrFromPlainPattern(s));
// https://github.com/gorhill/uBlock/issues/3037
// Make sure the logger reflects accurately internal match, taking
// into account MAX_TOKEN_LENGTH.
if ( /^[0-9a-z%]{1,6}$/i.exec(s.slice(this.tokenBeg)) !== null ) {
details.regex.push('(?![0-9A-Za-z%])');
}
} }
toSelfie() { toSelfie() {
@ -2474,7 +2482,7 @@ const urlTokenizer = new (class {
this.knownTokens = new Uint8Array(65536); this.knownTokens = new Uint8Array(65536);
this.resetKnownTokens(); this.resetKnownTokens();
this.MAX_TOKEN_LENGTH = 7; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH;
} }
setURL(url) { setURL(url) {
@ -2536,7 +2544,7 @@ const urlTokenizer = new (class {
if ( l === 0 ) { return this.emptyTokenHash; } if ( l === 0 ) { return this.emptyTokenHash; }
const vtc = this._validTokenChars; const vtc = this._validTokenChars;
let th = vtc[s.charCodeAt(0)]; let th = vtc[s.charCodeAt(0)];
for ( let i = 1; i !== 7 && i !== l; i++ ) { for ( let i = 1; i !== 7 /* MAX_TOKEN_LENGTH */ && i !== l; i++ ) {
th = th << 4 ^ vtc[s.charCodeAt(i)]; th = th << 4 ^ vtc[s.charCodeAt(i)];
} }
return th; return th;
@ -2599,7 +2607,7 @@ const urlTokenizer = new (class {
if ( cc === 0x3F /* '?' */ ) { hasq = i; } if ( cc === 0x3F /* '?' */ ) { hasq = i; }
break; break;
} }
if ( n === 7 ) { continue; } if ( n === 7 /* MAX_TOKEN_LENGTH */ ) { continue; }
th = th << 4 ^ v; th = th << 4 ^ v;
n += 1; n += 1;
} }