mirror of https://github.com/gorhill/uBlock.git
add support for iso-8859-1/windows-1252 encoding (https://github.com/gorhill/uBlock/issues/3391#issuecomment-354868704)
This commit is contained in:
parent
fcd2124ad3
commit
a0375bb6a3
|
@ -517,6 +517,7 @@ var onMessage = function(request, sender, callback) {
|
||||||
// already been injected.
|
// already been injected.
|
||||||
if (
|
if (
|
||||||
µb.canFilterResponseBody === false ||
|
µb.canFilterResponseBody === false ||
|
||||||
|
µb.textEncode === undefined ||
|
||||||
µb.textEncode.normalizeCharset(request.charset) === undefined
|
µb.textEncode.normalizeCharset(request.charset) === undefined
|
||||||
) {
|
) {
|
||||||
response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
|
response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
|
||||||
|
|
|
@ -25,16 +25,40 @@
|
||||||
|
|
||||||
µBlock.textEncode = (function() {
|
µBlock.textEncode = (function() {
|
||||||
|
|
||||||
|
if ( µBlock.canFilterResponseBody !== true ) { return; }
|
||||||
|
|
||||||
|
// charset aliases extracted from:
|
||||||
|
// https://github.com/inexorabletash/text-encoding/blob/b4e5bc26e26e51f56e3daa9f13138c79f49d3c34/lib/encoding.js#L342
|
||||||
var normalizedCharset = new Map([
|
var normalizedCharset = new Map([
|
||||||
[ 'utf8', 'utf-8' ],
|
[ 'utf8', 'utf-8' ],
|
||||||
[ 'unicode-1-1-utf-8', 'utf-8' ],
|
[ 'unicode-1-1-utf-8', 'utf-8' ],
|
||||||
[ 'utf-8', 'utf-8' ],
|
[ 'utf-8', 'utf-8' ],
|
||||||
|
|
||||||
[ 'windows-1250', 'windows-1250' ],
|
[ 'windows-1250', 'windows-1250' ],
|
||||||
[ 'cp1250', 'windows-1250' ],
|
[ 'cp1250', 'windows-1250' ],
|
||||||
[ 'x-cp1250', 'windows-1250' ],
|
[ 'x-cp1250', 'windows-1250' ],
|
||||||
|
|
||||||
[ 'windows-1251', 'windows-1251' ],
|
[ 'windows-1251', 'windows-1251' ],
|
||||||
[ 'cp1251', 'windows-1251' ],
|
[ 'cp1251', 'windows-1251' ],
|
||||||
[ 'x-cp1251', 'windows-1251' ],
|
[ 'x-cp1251', 'windows-1251' ],
|
||||||
|
|
||||||
|
[ 'windows-1252', 'windows-1252' ],
|
||||||
|
[ 'ansi_x3.4-1968', 'windows-1252' ],
|
||||||
|
[ 'ascii', 'windows-1252' ],
|
||||||
|
[ 'cp1252', 'windows-1252' ],
|
||||||
|
[ 'cp819', 'windows-1252' ],
|
||||||
|
[ 'csisolatin1', 'windows-1252' ],
|
||||||
|
[ 'ibm819', 'windows-1252' ],
|
||||||
|
[ 'iso-8859-1', 'windows-1252' ],
|
||||||
|
[ 'iso-ir-100', 'windows-1252' ],
|
||||||
|
[ 'iso8859-1', 'windows-1252' ],
|
||||||
|
[ 'iso88591', 'windows-1252' ],
|
||||||
|
[ 'iso_8859-1', 'windows-1252' ],
|
||||||
|
[ 'iso_8859-1:1987', 'windows-1252' ],
|
||||||
|
[ 'l1', 'windows-1252' ],
|
||||||
|
[ 'latin1', 'windows-1252' ],
|
||||||
|
[ 'us-ascii', 'windows-1252' ],
|
||||||
|
[ 'x-cp1252', 'windows-1252' ],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
|
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
|
||||||
|
@ -77,7 +101,17 @@
|
||||||
/* 0x0478 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
/* 0x0478 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
/* 0x0480 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
/* 0x0480 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
/* 0x0488 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
/* 0x0488 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
/* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
/* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||||
|
]);
|
||||||
|
|
||||||
|
// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
|
||||||
|
var cp1252_range0 = new Uint8Array([
|
||||||
|
/* 0x0150 */ 0x00, 0x00, 0x8C, 0x9C, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
/* 0x0158 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
/* 0x0160 */ 0x8A, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
/* 0x0168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
/* 0x0170 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
/* 0x0178 */ 0x9F, 0x00, 0x00, 0x00, 0x00, 0x8E, 0x9E, 0x00
|
||||||
]);
|
]);
|
||||||
|
|
||||||
var cp125x_range0 = new Uint8Array([
|
var cp125x_range0 = new Uint8Array([
|
||||||
|
@ -171,6 +205,47 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf.slice(0, o);
|
return buf.slice(0, o);
|
||||||
|
},
|
||||||
|
'windows-1252': function(buf) {
|
||||||
|
var i = 0, n = buf.byteLength, o = 0, c;
|
||||||
|
while ( i < n ) {
|
||||||
|
c = buf[i++];
|
||||||
|
if ( c < 0x80 ) {
|
||||||
|
buf[o++] = c;
|
||||||
|
} else {
|
||||||
|
if ( (c & 0xE0) === 0xC0 ) {
|
||||||
|
c = (c & 0x1F) << 6;
|
||||||
|
c |= (buf[i++] & 0x3F);
|
||||||
|
} else if ( (c & 0xF0) === 0xE0 ) {
|
||||||
|
c = (c & 0x0F) << 12;
|
||||||
|
c |= (buf[i++] & 0x3F) << 6;
|
||||||
|
c |= (buf[i++] & 0x3F);
|
||||||
|
} else if ( (c & 0xF8) === 0xF0 ) {
|
||||||
|
c = (c & 0x07) << 18;
|
||||||
|
c |= (buf[i++] & 0x3F) << 12;
|
||||||
|
c |= (buf[i++] & 0x3F) << 6;
|
||||||
|
c |= (buf[i++] & 0x3F);
|
||||||
|
}
|
||||||
|
if ( c < 0x100 ) {
|
||||||
|
buf[o++] = c;
|
||||||
|
} else if ( c >= 0x150 && c < 0x180 ) {
|
||||||
|
buf[o++] = cp1252_range0[c - 0x150];
|
||||||
|
} else if ( c >= 0x2010 && c < 0x2040 ) {
|
||||||
|
buf[o++] = cp125x_range0[c - 0x2010];
|
||||||
|
} else if ( c === 0x192 ) {
|
||||||
|
buf[o++] = 0x83;
|
||||||
|
} else if ( c === 0x2C6 ) {
|
||||||
|
buf[o++] = 0x88;
|
||||||
|
} else if ( c === 0x2DC ) {
|
||||||
|
buf[o++] = 0x98;
|
||||||
|
} else if ( c === 0x20AC ) {
|
||||||
|
buf[o++] = 0x80;
|
||||||
|
} else if ( c === 0x2122 ) {
|
||||||
|
buf[o++] = 0x99;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.slice(0, o);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -577,7 +577,7 @@ var filterDocument = (function() {
|
||||||
var µb = µBlock,
|
var µb = µBlock,
|
||||||
filterers = new Map(),
|
filterers = new Map(),
|
||||||
domParser, xmlSerializer,
|
domParser, xmlSerializer,
|
||||||
textDecoderCharset, textDecoder, textEncoder;
|
utf8TextDecoder, textDecoder, textEncoder;
|
||||||
|
|
||||||
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
|
var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i,
|
||||||
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
|
reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
|
||||||
|
@ -737,29 +737,17 @@ var filterDocument = (function() {
|
||||||
textEncoder = new TextEncoder();
|
textEncoder = new TextEncoder();
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case of unknown charset, assume utf-8.
|
var doc;
|
||||||
if (
|
|
||||||
filterer.charset === undefined && textDecoderCharset !== 'utf-8' ||
|
|
||||||
filterer.charset !== undefined && filterer.charset !== textDecoderCharset
|
|
||||||
) {
|
|
||||||
textDecoder = undefined;
|
|
||||||
}
|
|
||||||
if ( textDecoder === undefined ) {
|
|
||||||
try {
|
|
||||||
textDecoder = new TextDecoder(filterer.charset);
|
|
||||||
textDecoderCharset = filterer.charset || 'utf-8';
|
|
||||||
} catch(ex) {
|
|
||||||
textDecoder = new TextDecoder();
|
|
||||||
textDecoderCharset = 'utf-8';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var doc = domParser.parseFromString(
|
|
||||||
textDecoder.decode(filterer.buffer),
|
|
||||||
'text/html'
|
|
||||||
);
|
|
||||||
|
|
||||||
|
// If stream encoding is still unknnown, try to extract from document.
|
||||||
if ( filterer.charset === undefined ) {
|
if ( filterer.charset === undefined ) {
|
||||||
|
if ( utf8TextDecoder === undefined ) {
|
||||||
|
utf8TextDecoder = new TextDecoder();
|
||||||
|
}
|
||||||
|
doc = domParser.parseFromString(
|
||||||
|
utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)),
|
||||||
|
'text/html'
|
||||||
|
);
|
||||||
filterer.charset = µb.textEncode.normalizeCharset(charsetFromDoc(doc));
|
filterer.charset = µb.textEncode.normalizeCharset(charsetFromDoc(doc));
|
||||||
if ( filterer.charset === undefined ) {
|
if ( filterer.charset === undefined ) {
|
||||||
streamClose(filterer);
|
streamClose(filterer);
|
||||||
|
@ -767,6 +755,21 @@ var filterDocument = (function() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
textDecoder !== undefined &&
|
||||||
|
textDecoder.encoding !== filterer.charset
|
||||||
|
) {
|
||||||
|
textDecoder = undefined;
|
||||||
|
}
|
||||||
|
if ( textDecoder === undefined ) {
|
||||||
|
textDecoder = new TextDecoder(filterer.charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
doc = domParser.parseFromString(
|
||||||
|
textDecoder.decode(filterer.buffer),
|
||||||
|
'text/html'
|
||||||
|
);
|
||||||
|
|
||||||
var modified = false;
|
var modified = false;
|
||||||
if ( filterer.selectors !== undefined ) {
|
if ( filterer.selectors !== undefined ) {
|
||||||
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
|
if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
|
||||||
|
|
Loading…
Reference in New Issue