This commit is contained in:
Raymond Hill 2018-01-02 23:06:16 -05:00
parent 636dcf7ee4
commit 455bf281f0
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
3 changed files with 135 additions and 13 deletions

View File

@ -34,6 +34,7 @@
<script src="js/pagestore.js"></script>
<script src="js/tab.js"></script>
<script src="js/traffic.js"></script>
<script src="js/text-encode.js"></script>
<script src="js/contextmenu.js"></script>
<script src="js/reverselookup.js"></script>
<script src="js/rpcreceiver.js"></script>

117
src/js/text-encode.js Normal file
View File

@ -0,0 +1,117 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2018 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uBlock
*/
'use strict';
/******************************************************************************/
µBlock.textEncode = (function() {
var cp1251_range0 = new Uint8Array([
/* 0x0400 */ 0x00, 0xA8, 0x80, 0x81, 0xAA, 0xBD, 0xB2, 0xAF,
/* 0x0408 */ 0xA3, 0x8A, 0x8C, 0x8E, 0x8D, 0x00, 0xA1, 0x8F,
/* 0x0410 */ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
/* 0x0418 */ 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
/* 0x0420 */ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
/* 0x0428 */ 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
/* 0x0430 */ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
/* 0x0438 */ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
/* 0x0440 */ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
/* 0x0448 */ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
/* 0x0450 */ 0x00, 0xB8, 0x90, 0x83, 0xBA, 0xBE, 0xB3, 0xBF,
/* 0x0458 */ 0xBC, 0x9A, 0x9C, 0x9E, 0x9D, 0x00, 0xA2, 0x9F,
/* 0x0460 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0468 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0470 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0478 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0480 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0488 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x0490 */ 0xA5, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]);
var cp1251_range1 = new Uint8Array([
/* 0x2010 */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00,
/* 0x2018 */ 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00,
/* 0x2020 */ 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00,
/* 0x2028 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x2030 */ 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
/* 0x2038 */ 0x00, 0x8B, 0x9B, 0x00, 0x00, 0x00, 0x00, 0x00
]);
var encoders = {
'windows-1251': function(buf) {
var i = 0, n = buf.byteLength, o = 0, c;
while ( i < n ) {
c = buf[i++];
if ( c < 0x80 ) {
buf[o++] = c;
} else {
if ( (c & 0xE0) === 0xC0 ) {
c = (c & 0x1F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xE0 ) {
c = (c & 0x0F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
} else if ( (c & 0xF0) === 0xF0 ) {
c = (c & 0x07) << 18;
c |= (buf[i++] & 0x3F) << 12;
c |= (buf[i++] & 0x3F) << 6;
c |= (buf[i++] & 0x3F);
}
if ( c >= 0x400 && c < 0x4A0 ) {
buf[o++] = cp1251_range0[c - 0x400];
} else if ( c >= 0x2010 && c < 0x2040 ) {
buf[o++] = cp1251_range1[c - 0x2010];
} else if ( c === 0x20AC ) {
buf[o++] = 0x88;
} else if ( c === 0x2116 ) {
buf[o++] = 0xB9;
} else if ( c === 0x2122 ) {
buf[o++] = 0x99;
} else if ( c < 0xD800 || c >= 0xE000 ) {
buf[o++] = c;
}
}
}
return buf.slice(0, o);
}
};
var api = {};
api.normalizedCharset = new Map([
[ 'utf8', 'utf-8' ],
[ 'unicode-1-1-utf-8', 'utf-8' ],
[ 'utf-8', 'utf-8' ],
[ 'windows-1251', 'windows-1251' ],
[ 'cp1251', 'windows-1251' ],
[ 'x-cp1251', 'windows-1251' ],
]);
api.encode = function(charset, buf) {
return encoders.hasOwnProperty(charset) ?
encoders[charset](buf) :
buf;
};
return api;
})();

View File

@ -1,7 +1,7 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
Copyright (C) 2014-2017 Raymond Hill
Copyright (C) 2014-2018 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -750,23 +750,24 @@ var filterDocument = (function() {
return;
}
// If the charset of the document was not utf-8, we need to change it
// to utf-8.
if ( textDecoderCharset !== undefined ) {
var meta = doc.createElement('meta');
meta.setAttribute('charset', 'utf-8');
doc.head.insertBefore(meta, doc.head.firstChild);
}
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
var doctypeStr = doc.doctype instanceof Object ?
xmlSerializer.serializeToString(doc.doctype) + '\n' :
'';
streamClose(
filterer,
textEncoder.encode(doctypeStr + doc.documentElement.outerHTML)
// https://github.com/gorhill/uBlock/issues/3391
var encodedStream = textEncoder.encode(
doctypeStr +
doc.documentElement.outerHTML
);
if ( textDecoderCharset !== undefined ) {
encodedStream = µb.textEncode.encode(
textDecoderCharset,
encodedStream
);
}
streamClose(filterer, encodedStream);
};
var onStreamError = function() {
@ -809,7 +810,10 @@ var filterDocument = (function() {
if ( match !== null ) {
var charset = match[1].toLowerCase();
if ( charset !== 'utf-8' ) {
request.charset = charset;
request.charset = µb.textEncode.normalizedCharset.get(charset);
if ( request.charset === 'utf-8' ) {
request.charset = undefined;
}
}
}
}