This commit is contained in:
Raymond Hill 2018-01-03 13:59:38 -05:00
parent 5a468be661
commit 04d84cf92a
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
4 changed files with 110 additions and 35 deletions

View File

@ -1403,7 +1403,8 @@ vAPI.domSurveyor = (function() {
{ {
what: 'retrieveContentScriptParameters', what: 'retrieveContentScriptParameters',
url: window.location.href, url: window.location.href,
isRootFrame: window === window.top isRootFrame: window === window.top,
charset: document.characterSet
}, },
bootstrapPhase1 bootstrapPhase1
); );

View File

@ -515,7 +515,10 @@ var onMessage = function(request, sender, callback) {
µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response); µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response);
// If response body filtering is supported, than the scriptlets have // If response body filtering is supported, than the scriptlets have
// already been injected. // already been injected.
if ( µb.canFilterResponseBody === false ) { if (
µb.canFilterResponseBody === false ||
µb.textEncode.normalizeCharset(request.charset) === undefined
) {
response.scriptlets = µb.scriptletFilteringEngine.retrieve(request); response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
} }
if ( request.isRootFrame && µb.logger.isEnabled() ) { if ( request.isRootFrame && µb.logger.isEnabled() ) {

View File

@ -25,6 +25,39 @@
µBlock.textEncode = (function() { µBlock.textEncode = (function() {
var normalizedCharset = new Map([
[ 'utf8', 'utf-8' ],
[ 'unicode-1-1-utf-8', 'utf-8' ],
[ 'utf-8', 'utf-8' ],
[ 'windows-1250', 'windows-1250' ],
[ 'cp1250', 'windows-1250' ],
[ 'x-cp1250', 'windows-1250' ],
[ 'windows-1251', 'windows-1251' ],
[ 'cp1251', 'windows-1251' ],
[ 'x-cp1251', 'windows-1251' ],
]);
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
var cp1250_range0 = new Uint8Array([
/* 0x0100 */ 0x00, 0x00, 0xC3, 0xE3, 0xA5, 0xB9, 0xC6, 0xE6,
/* 0x0108 */ 0x00, 0x00, 0x00, 0x00, 0xC8, 0xE8, 0xCF, 0xEF,
/* 0x0110 */ 0xD0, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0118 */ 0xCA, 0xEA, 0xCC, 0xEC, 0x00, 0x00, 0x00, 0x00,
/* 0x0120 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0130 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0138 */ 0x00, 0xC5, 0xE5, 0x00, 0x00, 0xBC, 0xBE, 0x00,
/* 0x0140 */ 0x00, 0xA3, 0xB3, 0xD1, 0xF1, 0x00, 0x00, 0xD2,
/* 0x0148 */ 0xF2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0150 */ 0xD5, 0xF5, 0x00, 0x00, 0xC0, 0xE0, 0x00, 0x00,
/* 0x0158 */ 0xD8, 0xF8, 0x8C, 0x9C, 0x00, 0x00, 0xAA, 0xBA,
/* 0x0160 */ 0x8A, 0x9A, 0xDE, 0xFE, 0x8D, 0x9D, 0x00, 0x00,
/* 0x0168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD9, 0xF9,
/* 0x0170 */ 0xDB, 0xFB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0178 */ 0x00, 0x8F, 0x9F, 0xAF, 0xBF, 0x8E, 0x9E, 0x00
]);
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
var cp1251_range0 = new Uint8Array([ var cp1251_range0 = new Uint8Array([
/* 0x0400 */ 0x00, 0xA8, 0x80, 0x81, 0xAA, 0xBD, 0xB2, 0xAF, /* 0x0400 */ 0x00, 0xA8, 0x80, 0x81, 0xAA, 0xBD, 0xB2, 0xAF,
/* 0x0408 */ 0xA3, 0x8A, 0x8C, 0x8E, 0x8D, 0x00, 0xA1, 0x8F, /* 0x0408 */ 0xA3, 0x8A, 0x8C, 0x8E, 0x8D, 0x00, 0xA1, 0x8F,
@ -47,7 +80,7 @@
/* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]); ]);
var cp1251_range1 = new Uint8Array([ var cp125x_range0 = new Uint8Array([
/* 0x2010 */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x2010 */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00,
/* 0x2018 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x2018 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00,
/* 0x2020 */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x2020 */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00,
@ -57,6 +90,51 @@
]); ]);
var encoders = { var encoders = {
'windows-1250': function(buf) {
var i = 0, n = buf.byteLength, o = 0, c;
while ( i < n ) {
c = buf[i++];
if ( c < 0x80 ) {
buf[o++] = c;
} else {
if ( (c & 0xE0) === 0xC0 ) {
c = (c & 0x1F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xE0 ) {
c = (c & 0x0F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF8) === 0xF0 ) {
c = (c & 0x07) << 18;
c |= (buf[i++] & 0x3F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
}
if ( c < 0x100 ) {
buf[o++] = c;
} else if ( c >= 0x100 && c < 0x180 ) {
buf[o++] = cp1250_range0[c - 0x100];
} else if ( c >= 0x2010 && c < 0x2040 ) {
buf[o++] = cp125x_range0[c - 0x2010];
} else if ( c === 0x02C7 ) {
buf[o++] = 0xA1;
} else if ( c === 0x02D8 ) {
buf[o++] = 0xA2;
} else if ( c === 0x02D9 ) {
buf[o++] = 0xFF;
} else if ( c === 0x02DB ) {
buf[o++] = 0xB2;
} else if ( c === 0x02DD ) {
buf[o++] = 0xBD;
} else if ( c === 0x20AC ) {
buf[o++] = 0x88;
} else if ( c === 0x2122 ) {
buf[o++] = 0x99;
}
}
}
return buf.slice(0, o);
},
'windows-1251': function(buf) { 'windows-1251': function(buf) {
var i = 0, n = buf.byteLength, o = 0, c; var i = 0, n = buf.byteLength, o = 0, c;
while ( i < n ) { while ( i < n ) {
@ -71,24 +149,24 @@
c = (c & 0x0F) << 12; c = (c & 0x0F) << 12;
c |= (buf[i++] & 0x3F) << 6; c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F); c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xF0 ) { } else if ( (c & 0xF8) === 0xF0 ) {
c = (c & 0x07) << 18; c = (c & 0x07) << 18;
c |= (buf[i++] & 0x3F) << 12; c |= (buf[i++] & 0x3F) << 12;
c |= (buf[i++] & 0x3F) << 6; c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F); c |= (buf[i++] & 0x3F);
} }
if ( c >= 0x400 && c < 0x4A0 ) { if ( c < 0x100 ) {
buf[o++] = c;
} else if ( c >= 0x400 && c < 0x4A0 ) {
buf[o++] = cp1251_range0[c - 0x400]; buf[o++] = cp1251_range0[c - 0x400];
} else if ( c >= 0x2010 && c < 0x2040 ) { } else if ( c >= 0x2010 && c < 0x2040 ) {
buf[o++] = cp1251_range1[c - 0x2010]; buf[o++] = cp125x_range0[c - 0x2010];
} else if ( c === 0x20AC ) { } else if ( c === 0x20AC ) {
buf[o++] = 0x88; buf[o++] = 0x88;
} else if ( c === 0x2116 ) { } else if ( c === 0x2116 ) {
buf[o++] = 0xB9; buf[o++] = 0xB9;
} else if ( c === 0x2122 ) { } else if ( c === 0x2122 ) {
buf[o++] = 0x99; buf[o++] = 0x99;
} else if ( c < 0xD800 || c >= 0xE000 ) {
buf[o++] = c;
} }
} }
} }
@ -96,22 +174,17 @@
} }
}; };
var api = {}; return {
encode: function(charset, buf) {
api.normalizedCharset = new Map([ return encoders.hasOwnProperty(charset) ?
[ 'utf8', 'utf-8' ], encoders[charset](buf) :
[ 'unicode-1-1-utf-8', 'utf-8' ], buf;
[ 'utf-8', 'utf-8' ], },
[ 'windows-1251', 'windows-1251' ], normalizeCharset: function(charset) {
[ 'cp1251', 'windows-1251' ], if ( charset === undefined ) {
[ 'x-cp1251', 'windows-1251' ], return 'utf-8';
]); }
return normalizedCharset.get(charset.toLowerCase());
api.encode = function(charset, buf) { }
return encoders.hasOwnProperty(charset) ?
encoders[charset](buf) :
buf;
}; };
return api;
})(); })();

View File

@ -511,7 +511,7 @@ var onHeadersReceived = function(details) {
} }
if ( isDoc && µb.canFilterResponseBody ) { if ( isDoc && µb.canFilterResponseBody ) {
filterDocument(details); filterDocument(pageStore, details);
} }
// https://github.com/gorhill/uBlock/issues/2813 // https://github.com/gorhill/uBlock/issues/2813
@ -579,6 +579,9 @@ var filterDocument = (function() {
domParser, xmlSerializer, domParser, xmlSerializer,
textDecoderCharset, textDecoder, textEncoder; textDecoderCharset, textDecoder, textEncoder;
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
// Purpose of following helper is to disconnect from watching the stream // Purpose of following helper is to disconnect from watching the stream
// if all the following conditions are fulfilled: // if all the following conditions are fulfilled:
// - Only need to inject scriptlets. // - Only need to inject scriptlets.
@ -774,7 +777,7 @@ var filterDocument = (function() {
filterers.delete(this); filterers.delete(this);
}; };
return function(details) { return function(pageStore, details) {
var hostname = µb.URI.hostnameFromURI(details.url); var hostname = µb.URI.hostnameFromURI(details.url);
if ( hostname === '' ) { return; } if ( hostname === '' ) { return; }
@ -808,12 +811,10 @@ var filterDocument = (function() {
if ( reContentTypeDocument.test(contentType) === false ) { return; } if ( reContentTypeDocument.test(contentType) === false ) { return; }
var match = reContentTypeCharset.exec(contentType); var match = reContentTypeCharset.exec(contentType);
if ( match !== null ) { if ( match !== null ) {
var charset = match[1].toLowerCase(); var charset = µb.textEncode.normalizeCharset(match[1]);
if ( charset === undefined ) { return; }
if ( charset !== 'utf-8' ) { if ( charset !== 'utf-8' ) {
request.charset = µb.textEncode.normalizedCharset.get(charset); request.charset = charset;
if ( request.charset === 'utf-8' ) {
request.charset = undefined;
}
} }
} }
} }
@ -829,9 +830,6 @@ var filterDocument = (function() {
}; };
})(); })();
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i;
var reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
/******************************************************************************/ /******************************************************************************/
var injectCSP = function(pageStore, details) { var injectCSP = function(pageStore, details) {