mirror of https://github.com/gorhill/uBlock.git
Assume UTF-8 when no encoding can be looked up.
This will make HTML filtering and `replace=` filter option less likely to be bypassed by uBO, as the body response filterer previously required an encoding to be expressly declared before acting on the response body. UTF-8 usage is currently reported as ~98.2%: https://w3techs.com/technologies/history_overview/character_encoding
This commit is contained in:
parent
b95a1e987f
commit
63acdcbdeb
|
@ -749,7 +749,7 @@ const bodyFilterer = (( ) => {
|
|||
/* t */ if ( bytes[i+6] !== 0x74 ) { continue; }
|
||||
break;
|
||||
}
|
||||
if ( (i - 40) >= 65536 ) { return; }
|
||||
if ( (i + 40) >= 65536 ) { return; }
|
||||
i += 8;
|
||||
// find first alpha character
|
||||
let j = -1;
|
||||
|
@ -827,13 +827,17 @@ const bodyFilterer = (( ) => {
|
|||
}
|
||||
if ( this.status !== 'finishedtransferringdata' ) { return; }
|
||||
|
||||
// If encoding is still unknown, try to extract from stream data
|
||||
// If encoding is still unknown, try to extract from stream data.
|
||||
// Just assume utf-8 if ultimately no encoding can be looked up.
|
||||
if ( session.charset === undefined ) {
|
||||
const charsetFound = charsetFromStream(session.buffer);
|
||||
if ( charsetFound === undefined ) { return streamClose(session); }
|
||||
const charsetUsed = textEncode.normalizeCharset(charsetFound);
|
||||
if ( charsetUsed === undefined ) { return streamClose(session); }
|
||||
session.charset = charsetUsed;
|
||||
if ( charsetFound !== undefined ) {
|
||||
const charsetUsed = textEncode.normalizeCharset(charsetFound);
|
||||
if ( charsetUsed === undefined ) { return streamClose(session); }
|
||||
session.charset = charsetUsed;
|
||||
} else {
|
||||
session.charset = 'utf-8';
|
||||
}
|
||||
}
|
||||
|
||||
while ( session.jobs.length !== 0 ) {
|
||||
|
|
Loading…
Reference in New Issue