This commit is contained in:
Raymond Hill 2018-01-03 13:59:38 -05:00
parent 5a468be661
commit 04d84cf92a
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 110 additions and 35 deletions

View File

@ -1403,7 +1403,8 @@ vAPI.domSurveyor = (function() {
{
what: 'retrieveContentScriptParameters',
url: window.location.href,
isRootFrame: window === window.top
isRootFrame: window === window.top,
charset: document.characterSet
},
bootstrapPhase1
);

View File

@ -515,7 +515,10 @@ var onMessage = function(request, sender, callback) {
µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response);
// If response body filtering is supported, than the scriptlets have
// already been injected.
if ( µb.canFilterResponseBody === false ) {
if (
µb.canFilterResponseBody === false ||
µb.textEncode.normalizeCharset(request.charset) === undefined
) {
response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
}
if ( request.isRootFrame && µb.logger.isEnabled() ) {

View File

@ -25,6 +25,39 @@
µBlock.textEncode = (function() {
var normalizedCharset = new Map([
[ 'utf8', 'utf-8' ],
[ 'unicode-1-1-utf-8', 'utf-8' ],
[ 'utf-8', 'utf-8' ],
[ 'windows-1250', 'windows-1250' ],
[ 'cp1250', 'windows-1250' ],
[ 'x-cp1250', 'windows-1250' ],
[ 'windows-1251', 'windows-1251' ],
[ 'cp1251', 'windows-1251' ],
[ 'x-cp1251', 'windows-1251' ],
]);
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
var cp1250_range0 = new Uint8Array([
/* 0x0100 */ 0x00, 0x00, 0xC3, 0xE3, 0xA5, 0xB9, 0xC6, 0xE6,
/* 0x0108 */ 0x00, 0x00, 0x00, 0x00, 0xC8, 0xE8, 0xCF, 0xEF,
/* 0x0110 */ 0xD0, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0118 */ 0xCA, 0xEA, 0xCC, 0xEC, 0x00, 0x00, 0x00, 0x00,
/* 0x0120 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0130 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0138 */ 0x00, 0xC5, 0xE5, 0x00, 0x00, 0xBC, 0xBE, 0x00,
/* 0x0140 */ 0x00, 0xA3, 0xB3, 0xD1, 0xF1, 0x00, 0x00, 0xD2,
/* 0x0148 */ 0xF2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0150 */ 0xD5, 0xF5, 0x00, 0x00, 0xC0, 0xE0, 0x00, 0x00,
/* 0x0158 */ 0xD8, 0xF8, 0x8C, 0x9C, 0x00, 0x00, 0xAA, 0xBA,
/* 0x0160 */ 0x8A, 0x9A, 0xDE, 0xFE, 0x8D, 0x9D, 0x00, 0x00,
/* 0x0168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD9, 0xF9,
/* 0x0170 */ 0xDB, 0xFB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0178 */ 0x00, 0x8F, 0x9F, 0xAF, 0xBF, 0x8E, 0x9E, 0x00
]);
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
var cp1251_range0 = new Uint8Array([
/* 0x0400 */ 0x00, 0xA8, 0x80, 0x81, 0xAA, 0xBD, 0xB2, 0xAF,
/* 0x0408 */ 0xA3, 0x8A, 0x8C, 0x8E, 0x8D, 0x00, 0xA1, 0x8F,
@ -47,7 +80,7 @@
/* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]);
var cp1251_range1 = new Uint8Array([
var cp125x_range0 = new Uint8Array([
/* 0x2010 */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00,
/* 0x2018 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00,
/* 0x2020 */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00,
@ -57,6 +90,51 @@
]);
var encoders = {
'windows-1250': function(buf) {
var i = 0, n = buf.byteLength, o = 0, c;
while ( i < n ) {
c = buf[i++];
if ( c < 0x80 ) {
buf[o++] = c;
} else {
if ( (c & 0xE0) === 0xC0 ) {
c = (c & 0x1F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xE0 ) {
c = (c & 0x0F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF8) === 0xF0 ) {
c = (c & 0x07) << 18;
c |= (buf[i++] & 0x3F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
}
if ( c < 0x100 ) {
buf[o++] = c;
} else if ( c >= 0x100 && c < 0x180 ) {
buf[o++] = cp1250_range0[c - 0x100];
} else if ( c >= 0x2010 && c < 0x2040 ) {
buf[o++] = cp125x_range0[c - 0x2010];
} else if ( c === 0x02C7 ) {
buf[o++] = 0xA1;
} else if ( c === 0x02D8 ) {
buf[o++] = 0xA2;
} else if ( c === 0x02D9 ) {
buf[o++] = 0xFF;
} else if ( c === 0x02DB ) {
buf[o++] = 0xB2;
} else if ( c === 0x02DD ) {
buf[o++] = 0xBD;
} else if ( c === 0x20AC ) {
buf[o++] = 0x88;
} else if ( c === 0x2122 ) {
buf[o++] = 0x99;
}
}
}
return buf.slice(0, o);
},
'windows-1251': function(buf) {
var i = 0, n = buf.byteLength, o = 0, c;
while ( i < n ) {
@ -71,24 +149,24 @@
c = (c & 0x0F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xF0 ) {
} else if ( (c & 0xF8) === 0xF0 ) {
c = (c & 0x07) << 18;
c |= (buf[i++] & 0x3F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
}
if ( c >= 0x400 && c < 0x4A0 ) {
if ( c < 0x100 ) {
buf[o++] = c;
} else if ( c >= 0x400 && c < 0x4A0 ) {
buf[o++] = cp1251_range0[c - 0x400];
} else if ( c >= 0x2010 && c < 0x2040 ) {
buf[o++] = cp1251_range1[c - 0x2010];
buf[o++] = cp125x_range0[c - 0x2010];
} else if ( c === 0x20AC ) {
buf[o++] = 0x88;
} else if ( c === 0x2116 ) {
buf[o++] = 0xB9;
} else if ( c === 0x2122 ) {
buf[o++] = 0x99;
} else if ( c < 0xD800 || c >= 0xE000 ) {
buf[o++] = c;
}
}
}
@ -96,22 +174,17 @@
}
};
var api = {};
api.normalizedCharset = new Map([
[ 'utf8', 'utf-8' ],
[ 'unicode-1-1-utf-8', 'utf-8' ],
[ 'utf-8', 'utf-8' ],
[ 'windows-1251', 'windows-1251' ],
[ 'cp1251', 'windows-1251' ],
[ 'x-cp1251', 'windows-1251' ],
]);
api.encode = function(charset, buf) {
return encoders.hasOwnProperty(charset) ?
encoders[charset](buf) :
buf;
return {
encode: function(charset, buf) {
return encoders.hasOwnProperty(charset) ?
encoders[charset](buf) :
buf;
},
normalizeCharset: function(charset) {
if ( charset === undefined ) {
return 'utf-8';
}
return normalizedCharset.get(charset.toLowerCase());
}
};
return api;
})();

View File

@ -511,7 +511,7 @@ var onHeadersReceived = function(details) {
}
if ( isDoc && µb.canFilterResponseBody ) {
filterDocument(details);
filterDocument(pageStore, details);
}
// https://github.com/gorhill/uBlock/issues/2813
@ -579,6 +579,9 @@ var filterDocument = (function() {
domParser, xmlSerializer,
textDecoderCharset, textDecoder, textEncoder;
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
// Purpose of following helper is to disconnect from watching the stream
// if all the following conditions are fulfilled:
// - Only need to inject scriptlets.
@ -774,7 +777,7 @@ var filterDocument = (function() {
filterers.delete(this);
};
return function(details) {
return function(pageStore, details) {
var hostname = µb.URI.hostnameFromURI(details.url);
if ( hostname === '' ) { return; }
@ -808,12 +811,10 @@ var filterDocument = (function() {
if ( reContentTypeDocument.test(contentType) === false ) { return; }
var match = reContentTypeCharset.exec(contentType);
if ( match !== null ) {
var charset = match[1].toLowerCase();
var charset = µb.textEncode.normalizeCharset(match[1]);
if ( charset === undefined ) { return; }
if ( charset !== 'utf-8' ) {
request.charset = µb.textEncode.normalizedCharset.get(charset);
if ( request.charset === 'utf-8' ) {
request.charset = undefined;
}
request.charset = charset;
}
}
}
@ -829,9 +830,6 @@ var filterDocument = (function() {
};
})();
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i;
var reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
/******************************************************************************/
var injectCSP = function(pageStore, details) {