mirror of https://github.com/gorhill/uBlock.git
Assume UTF-8 when no encoding can be looked up.
This will make HTML filtering and `replace=` filter option less likely to be bypassed by uBO, as the body response filterer previously required an encoding to be expressly declared before acting on the response body. UTF-8 usage is currently reported as ~98.2%: https://w3techs.com/technologies/history_overview/character_encoding
This commit is contained in:
parent
b95a1e987f
commit
63acdcbdeb
|
@ -749,7 +749,7 @@ const bodyFilterer = (( ) => {
|
||||||
/* t */ if ( bytes[i+6] !== 0x74 ) { continue; }
|
/* t */ if ( bytes[i+6] !== 0x74 ) { continue; }
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if ( (i - 40) >= 65536 ) { return; }
|
if ( (i + 40) >= 65536 ) { return; }
|
||||||
i += 8;
|
i += 8;
|
||||||
// find first alpha character
|
// find first alpha character
|
||||||
let j = -1;
|
let j = -1;
|
||||||
|
@ -827,13 +827,17 @@ const bodyFilterer = (( ) => {
|
||||||
}
|
}
|
||||||
if ( this.status !== 'finishedtransferringdata' ) { return; }
|
if ( this.status !== 'finishedtransferringdata' ) { return; }
|
||||||
|
|
||||||
// If encoding is still unknown, try to extract from stream data
|
// If encoding is still unknown, try to extract from stream data.
|
||||||
|
// Just assume utf-8 if ultimately no encoding can be looked up.
|
||||||
if ( session.charset === undefined ) {
|
if ( session.charset === undefined ) {
|
||||||
const charsetFound = charsetFromStream(session.buffer);
|
const charsetFound = charsetFromStream(session.buffer);
|
||||||
if ( charsetFound === undefined ) { return streamClose(session); }
|
if ( charsetFound !== undefined ) {
|
||||||
const charsetUsed = textEncode.normalizeCharset(charsetFound);
|
const charsetUsed = textEncode.normalizeCharset(charsetFound);
|
||||||
if ( charsetUsed === undefined ) { return streamClose(session); }
|
if ( charsetUsed === undefined ) { return streamClose(session); }
|
||||||
session.charset = charsetUsed;
|
session.charset = charsetUsed;
|
||||||
|
} else {
|
||||||
|
session.charset = 'utf-8';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while ( session.jobs.length !== 0 ) {
|
while ( session.jobs.length !== 0 ) {
|
||||||
|
|
Loading…
Reference in New Issue