From 04d84cf92aeb40be1093b33f0383cd3705f8b169 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Wed, 3 Jan 2018 13:59:38 -0500 Subject: [PATCH] fix #3397 --- src/js/contentscript.js | 3 +- src/js/messaging.js | 5 +- src/js/text-encode.js | 119 ++++++++++++++++++++++++++++++++-------- src/js/traffic.js | 18 +++--- 4 files changed, 110 insertions(+), 35 deletions(-) diff --git a/src/js/contentscript.js b/src/js/contentscript.js index 4c50ceda2..eb97b75fa 100644 --- a/src/js/contentscript.js +++ b/src/js/contentscript.js @@ -1403,7 +1403,8 @@ vAPI.domSurveyor = (function() { { what: 'retrieveContentScriptParameters', url: window.location.href, - isRootFrame: window === window.top + isRootFrame: window === window.top, + charset: document.characterSet }, bootstrapPhase1 ); diff --git a/src/js/messaging.js b/src/js/messaging.js index eefb95788..4eefbeb9c 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -515,7 +515,10 @@ var onMessage = function(request, sender, callback) { µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response); // If response body filtering is supported, than the scriptlets have // already been injected. - if ( µb.canFilterResponseBody === false ) { + if ( + µb.canFilterResponseBody === false || + µb.textEncode.normalizeCharset(request.charset) === undefined + ) { response.scriptlets = µb.scriptletFilteringEngine.retrieve(request); } if ( request.isRootFrame && µb.logger.isEnabled() ) { diff --git a/src/js/text-encode.js b/src/js/text-encode.js index 12568cfed..60a4c8d94 100644 --- a/src/js/text-encode.js +++ b/src/js/text-encode.js @@ -25,6 +25,39 @@ µBlock.textEncode = (function() { + var normalizedCharset = new Map([ + [ 'utf8', 'utf-8' ], + [ 'unicode-1-1-utf-8', 'utf-8' ], + [ 'utf-8', 'utf-8' ], + [ 'windows-1250', 'windows-1250' ], + [ 'cp1250', 'windows-1250' ], + [ 'x-cp1250', 'windows-1250' ], + [ 'windows-1251', 'windows-1251' ], + [ 'cp1251', 'windows-1251' ], + [ 'x-cp1251', 'windows-1251' ], + ]); + + // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT + var cp1250_range0 = new Uint8Array([ + /* 0x0100 */ 0x00, 0x00, 0xC3, 0xE3, 0xA5, 0xB9, 0xC6, 0xE6, + /* 0x0108 */ 0x00, 0x00, 0x00, 0x00, 0xC8, 0xE8, 0xCF, 0xEF, + /* 0x0110 */ 0xD0, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0118 */ 0xCA, 0xEA, 0xCC, 0xEC, 0x00, 0x00, 0x00, 0x00, + /* 0x0120 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0130 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0138 */ 0x00, 0xC5, 0xE5, 0x00, 0x00, 0xBC, 0xBE, 0x00, + /* 0x0140 */ 0x00, 0xA3, 0xB3, 0xD1, 0xF1, 0x00, 0x00, 0xD2, + /* 0x0148 */ 0xF2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0150 */ 0xD5, 0xF5, 0x00, 0x00, 0xC0, 0xE0, 0x00, 0x00, + /* 0x0158 */ 0xD8, 0xF8, 0x8C, 0x9C, 0x00, 0x00, 0xAA, 0xBA, + /* 0x0160 */ 0x8A, 0x9A, 0xDE, 0xFE, 0x8D, 0x9D, 0x00, 0x00, + /* 0x0168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD9, 0xF9, + /* 0x0170 */ 0xDB, 0xFB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x0178 */ 0x00, 0x8F, 0x9F, 0xAF, 0xBF, 0x8E, 0x9E, 0x00 + ]); + + // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT var cp1251_range0 = new Uint8Array([ /* 0x0400 */ 0x00, 0xA8, 0x80, 0x81, 0xAA, 0xBD, 0xB2, 0xAF, /* 0x0408 */ 0xA3, 0x8A, 0x8C, 0x8E, 0x8D, 0x00, 0xA1, 0x8F, @@ -47,7 +80,7 @@ /* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]); - var cp1251_range1 = new Uint8Array([ + var cp125x_range0 = new Uint8Array([ /* 0x2010 */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x2018 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x2020 */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, @@ -57,6 +90,51 @@ ]); var encoders = { + 'windows-1250': function(buf) { + var i = 0, n = buf.byteLength, o = 0, c; + while ( i < n ) { + c = buf[i++]; + if ( c < 0x80 ) { + buf[o++] = c; + } else { + if ( (c & 0xE0) === 0xC0 ) { + c = (c & 0x1F) << 6; + c |= (buf[i++] & 0x3F); + } else if ( (c & 0xF0) === 0xE0 ) { + c = (c & 0x0F) << 12; + c |= (buf[i++] & 0x3F) << 6; + c |= (buf[i++] & 0x3F); + } else if ( (c & 0xF8) === 0xF0 ) { + c = (c & 0x07) << 18; + c |= (buf[i++] & 0x3F) << 12; + c |= (buf[i++] & 0x3F) << 6; + c |= (buf[i++] & 0x3F); + } + if ( c < 0x100 ) { + buf[o++] = c; + } else if ( c >= 0x100 && c < 0x180 ) { + buf[o++] = cp1250_range0[c - 0x100]; + } else if ( c >= 0x2010 && c < 0x2040 ) { + buf[o++] = cp125x_range0[c - 0x2010]; + } else if ( c === 0x02C7 ) { + buf[o++] = 0xA1; + } else if ( c === 0x02D8 ) { + buf[o++] = 0xA2; + } else if ( c === 0x02D9 ) { + buf[o++] = 0xFF; + } else if ( c === 0x02DB ) { + buf[o++] = 0xB2; + } else if ( c === 0x02DD ) { + buf[o++] = 0xBD; + } else if ( c === 0x20AC ) { + buf[o++] = 0x88; + } else if ( c === 0x2122 ) { + buf[o++] = 0x99; + } + } + } + return buf.slice(0, o); + }, 'windows-1251': function(buf) { var i = 0, n = buf.byteLength, o = 0, c; while ( i < n ) { @@ -71,24 +149,24 @@ c = (c & 0x0F) << 12; c |= (buf[i++] & 0x3F) << 6; c |= (buf[i++] & 0x3F); - } else if ( (c & 0xF0) === 0xF0 ) { + } else if ( (c & 0xF8) === 0xF0 ) { c = (c & 0x07) << 18; c |= (buf[i++] & 0x3F) << 12; c |= (buf[i++] & 0x3F) << 6; c |= (buf[i++] & 0x3F); } - if ( c >= 0x400 && c < 0x4A0 ) { + if ( c < 0x100 ) { + buf[o++] = c; + } else if ( c >= 0x400 && c < 0x4A0 ) { buf[o++] = cp1251_range0[c - 0x400]; } else if ( c >= 0x2010 && c < 0x2040 ) { - buf[o++] = cp1251_range1[c - 0x2010]; + buf[o++] = cp125x_range0[c - 0x2010]; } else if ( c === 0x20AC ) { buf[o++] = 0x88; } else if ( c === 0x2116 ) { buf[o++] = 0xB9; } else if ( c === 0x2122 ) { buf[o++] = 0x99; - } else if ( c < 0xD800 || c >= 0xE000 ) { - buf[o++] = c; } } } @@ -96,22 +174,17 @@ } }; - var api = {}; - - api.normalizedCharset = new Map([ - [ 'utf8', 'utf-8' ], - [ 'unicode-1-1-utf-8', 'utf-8' ], - [ 'utf-8', 'utf-8' ], - [ 'windows-1251', 'windows-1251' ], - [ 'cp1251', 'windows-1251' ], - [ 'x-cp1251', 'windows-1251' ], - ]); - - api.encode = function(charset, buf) { - return encoders.hasOwnProperty(charset) ? - encoders[charset](buf) : - buf; + return { + encode: function(charset, buf) { + return encoders.hasOwnProperty(charset) ? + encoders[charset](buf) : + buf; + }, + normalizeCharset: function(charset) { + if ( charset === undefined ) { + return 'utf-8'; + } + return normalizedCharset.get(charset.toLowerCase()); + } }; - - return api; })(); diff --git a/src/js/traffic.js b/src/js/traffic.js index 0734c1e1a..548caa5a9 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -511,7 +511,7 @@ var onHeadersReceived = function(details) { } if ( isDoc && µb.canFilterResponseBody ) { - filterDocument(details); + filterDocument(pageStore, details); } // https://github.com/gorhill/uBlock/issues/2813 @@ -579,6 +579,9 @@ var filterDocument = (function() { domParser, xmlSerializer, textDecoderCharset, textDecoder, textEncoder; + var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i, + reContentTypeCharset = /charset=['"]?([^'" ]+)/i; + // Purpose of following helper is to disconnect from watching the stream // if all the following conditions are fulfilled: // - Only need to inject scriptlets. @@ -774,7 +777,7 @@ var filterDocument = (function() { filterers.delete(this); }; - return function(details) { + return function(pageStore, details) { var hostname = µb.URI.hostnameFromURI(details.url); if ( hostname === '' ) { return; } @@ -808,12 +811,10 @@ var filterDocument = (function() { if ( reContentTypeDocument.test(contentType) === false ) { return; } var match = reContentTypeCharset.exec(contentType); if ( match !== null ) { - var charset = match[1].toLowerCase(); + var charset = µb.textEncode.normalizeCharset(match[1]); + if ( charset === undefined ) { return; } if ( charset !== 'utf-8' ) { - request.charset = µb.textEncode.normalizedCharset.get(charset); - if ( request.charset === 'utf-8' ) { - request.charset = undefined; - } + request.charset = charset; } } } @@ -829,9 +830,6 @@ var filterDocument = (function() { }; })(); -var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i; -var reContentTypeCharset = /charset=['"]?([^'" ]+)/i; - /******************************************************************************/ var injectCSP = function(pageStore, details) {