From 5cbe583c0aa08c9698857862db92cfff8b9c7634 Mon Sep 17 00:00:00 2001 From: gorhill Date: Tue, 23 Aug 2016 10:33:28 -0400 Subject: [PATCH] fix #1929 --- src/js/logger-ui.js | 18 +++++++-- src/js/static-net-filtering.js | 72 ++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/src/js/logger-ui.js b/src/js/logger-ui.js index 398b664fb..ae3d90dc7 100644 --- a/src/js/logger-ui.js +++ b/src/js/logger-ui.js @@ -309,14 +309,24 @@ var filterDecompiler = (function() { case 'a|h': case '_': case '_h': + reStr = tfields[0] + .replace(reEscape, '\\$&') + .replace(reWildcards, '.*?') + .replace(reSeparator, '(?:[^%.0-9a-z_-]|$)'); + break; case '||a': case '||ah': case '||_': case '||_h': - reStr = tfields[0] - .replace(reEscape, '\\$&') - .replace(reWildcards, '.*') - .replace(reSeparator, '(?:[^%.0-9a-z_-]|$)'); + reStr = ''; + if ( tfields[0].charCodeAt(0) === 0x2A ) { + reStr = '[0-9a-z.-]*?'; + tfields[0] = tfields[0].slice(1); + } + reStr += tfields[0] + .replace(reEscape, '\\$&') + .replace(reWildcards, '.*?') + .replace(reSeparator, '(?:[^%.0-9a-z_-]|$)'); break; case '//': case '//h': diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index bdca79f3a..e06589132 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -105,8 +105,6 @@ var AllowAnyTypeAnyParty = AllowAction | AnyType | AnyParty; var AllowAnyType = AllowAction | AnyType; var AllowAnyParty = AllowAction | AnyParty; -var reURLPostHostnameAnchors = /[\/?#]/; - // ABP filters: https://adblockplus.org/en/filters // regex tester: http://regex101.com/ @@ -206,7 +204,11 @@ var strToRegex = function(s, anchor, flags) { if ( s === '*' ) { return alwaysTruePseudoRegex; } - + var anchorToHnStart; + if ( s.startsWith('||') ) { + s = s.slice(2); + anchorToHnStart = s.charCodeAt(0) === 0x2A; + } // https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions // Also: remove leading/trailing wildcards -- there is no point. @@ -220,7 +222,9 @@ var strToRegex = function(s, anchor, flags) { } else if ( anchor > 0 ) { reStr += '$'; } - + if ( anchorToHnStart ) { + reStr = '[0-9a-z.-]*?' + reStr; + } //console.debug('µBlock.staticNetFilteringEngine: created RegExp("%s")', reStr); return new RegExp(reStr, flags); }; @@ -229,6 +233,26 @@ var toHex = function(n) { return n.toString(16); }; +// First character of match must be within the hostname part of the url. +var isHnAnchored = function(url, matchStart) { + var hnStart = url.indexOf('://'); + if ( hnStart === -1 ) { + return false; + } + hnStart += 3; + if ( matchStart <= hnStart ) { + return true; + } + if ( reURLPostHostnameAnchors.test(url.slice(hnStart, matchStart)) ) { + return false; + } + // https://github.com/gorhill/uBlock/issues/1929 + // Match only hostname label boundaries. + return url.charCodeAt(matchStart - 1) === 0x2E; +}; + +var reURLPostHostnameAnchors = /[\/?#]/; + /******************************************************************************/ // Hostname test helpers: the optimal test function is picked @@ -699,13 +723,8 @@ var FilterPlainHnAnchored = function(s) { }; FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) { - if ( url.startsWith(this.s, tokenBeg) === false ) { - return false; - } - // Valid only if hostname-valid characters to the left of token - var pos = url.indexOf('://'); - return pos !== -1 && - reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false; + return url.startsWith(this.s, tokenBeg) && + isHnAnchored(url, tokenBeg); }; FilterPlainHnAnchored.fid = @@ -739,16 +758,9 @@ var FilterPlainHnAnchoredHostname = function(s, domainOpt) { }; FilterPlainHnAnchoredHostname.prototype.match = function(url, tokenBeg) { - if ( - url.startsWith(this.s, tokenBeg) === false || - this.hostnameTest(this) === false - ) { - return false; - } - // Valid only if hostname-valid characters to the left of token - var pos = url.indexOf('://'); - return pos !== -1 && - reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false; + return url.startsWith(this.s, tokenBeg) && + this.hostnameTest(this) && + isHnAnchored(url, tokenBeg); }; FilterPlainHnAnchoredHostname.fid = @@ -852,18 +864,10 @@ var FilterGenericHnAnchored = function(s) { FilterGenericHnAnchored.prototype.match = function(url) { if ( this.re === null ) { - this.re = strToRegex(this.s, 0); + this.re = strToRegex('||' + this.s, 0); } - // Quick test first - if ( this.re.test(url) === false ) { - return false; - } - // Valid only if begininning of match is within the hostname - // part of the url - var match = this.re.exec(url); - var pos = url.indexOf('://'); - return pos !== -1 && - reURLPostHostnameAnchors.test(url.slice(pos + 3, match.index)) === false; + var matchStart = url.search(this.re); + return matchStart !== -1 && isHnAnchored(url, matchStart); }; FilterGenericHnAnchored.fid = @@ -2228,11 +2232,13 @@ FilterContainer.prototype.filterRegexFromCompiled = function(compiled, flags) { case '1ah': case '_': case '_h': + re = strToRegex(tfields[0], 0, flags); + break; case '||a': case '||ah': case '||_': case '||_h': - re = strToRegex(tfields[0], 0, flags); + re = strToRegex('||' + tfields[0], 0, flags); break; case '|a': case '|ah':