mirror of https://github.com/gorhill/uBlock.git
Fix various code paths in bodyFilterer
Related to extracting charset of response body.
This commit is contained in:
parent
1cf634cce0
commit
5f78d83fea
|
@ -637,14 +637,11 @@ function textResponseFilterer(session, directives) {
|
|||
));
|
||||
applied.push(directive);
|
||||
}
|
||||
|
||||
if ( applied && logger.enabled ) {
|
||||
if ( applied.length === 0 ) { return; }
|
||||
if ( logger.enabled !== true ) { return; }
|
||||
session.setRealm('network')
|
||||
.pushFilters(applied.map(a => a.logData()))
|
||||
.toLogger();
|
||||
}
|
||||
|
||||
return applied.length !== 0;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
|
@ -660,11 +657,8 @@ function htmlResponseFilterer(session, selectors) {
|
|||
session.mime
|
||||
);
|
||||
|
||||
if ( selectors !== undefined ) {
|
||||
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ( selectors === undefined ) { return; }
|
||||
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) { return; }
|
||||
|
||||
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
|
||||
const doctypeStr = [
|
||||
|
@ -674,8 +668,6 @@ function htmlResponseFilterer(session, selectors) {
|
|||
doc.documentElement.outerHTML,
|
||||
].join('\n');
|
||||
session.setString(doctypeStr);
|
||||
|
||||
return true;
|
||||
}
|
||||
htmlResponseFilterer.domParser = null;
|
||||
htmlResponseFilterer.xmlSerializer = null;
|
||||
|
@ -692,7 +684,7 @@ htmlResponseFilterer.xmlSerializer = null;
|
|||
|
||||
const bodyFilterer = (( ) => {
|
||||
const sessions = new Map();
|
||||
const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i;
|
||||
const reContentTypeDocument = /^([^;]+)(?:;|$)/i;
|
||||
const reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
|
||||
const otherValidMimes = new Set([
|
||||
'application/javascript',
|
||||
|
@ -700,14 +692,27 @@ const bodyFilterer = (( ) => {
|
|||
'application/xml',
|
||||
'application/xhtml+xml',
|
||||
]);
|
||||
const NOT_TEXT_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET;
|
||||
const BINARY_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET;
|
||||
const MAX_BUFFER_LENGTH = 3 * 1024 * 1024;
|
||||
|
||||
let textDecoder, textEncoder;
|
||||
|
||||
const contentTypeFromDetails = details => {
|
||||
switch ( details.type ) {
|
||||
case 'script':
|
||||
return 'text/javascript; charset=utf-8';
|
||||
case 'stylesheet':
|
||||
return 'text/css';
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return '';
|
||||
};
|
||||
|
||||
const mimeFromContentType = contentType => {
|
||||
const match = reContentTypeDocument.exec(contentType);
|
||||
if ( match === null ) { return; }
|
||||
return match[0].toLowerCase();
|
||||
return match[1].toLowerCase();
|
||||
};
|
||||
|
||||
const charsetFromContentType = contentType => {
|
||||
|
@ -716,6 +721,19 @@ const bodyFilterer = (( ) => {
|
|||
return match[1].toLowerCase();
|
||||
};
|
||||
|
||||
const charsetFromMime = mime => {
|
||||
switch ( mime ) {
|
||||
case 'application/xml':
|
||||
case 'application/xhtml+xml':
|
||||
case 'text/html':
|
||||
case 'text/css':
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 'utf-8';
|
||||
};
|
||||
|
||||
const charsetFromStream = bytes => {
|
||||
if ( bytes.length < 3 ) { return; }
|
||||
if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) {
|
||||
|
@ -796,6 +814,11 @@ const bodyFilterer = (( ) => {
|
|||
buffer.set(session.buffer);
|
||||
buffer.set(new Uint8Array(ev.data), session.buffer.byteLength);
|
||||
session.buffer = buffer;
|
||||
if ( session.buffer.length >= MAX_BUFFER_LENGTH ) {
|
||||
sessions.delete(this);
|
||||
this.write(session.buffer);
|
||||
this.disconnect();
|
||||
}
|
||||
};
|
||||
|
||||
const onStreamStop = function() {
|
||||
|
@ -816,12 +839,11 @@ const bodyFilterer = (( ) => {
|
|||
session.charset = charsetUsed;
|
||||
}
|
||||
|
||||
let modified = false;
|
||||
while ( session.jobs.length !== 0 ) {
|
||||
const job = session.jobs.shift();
|
||||
modified = job.fn(session, ...job.args) || modified;
|
||||
job.fn(session, ...job.args);
|
||||
}
|
||||
if ( modified !== true ) { return streamClose(session); }
|
||||
if ( session.modified !== true ) { return streamClose(session); }
|
||||
|
||||
if ( textEncoder === undefined ) {
|
||||
textEncoder = new TextEncoder();
|
||||
|
@ -840,7 +862,7 @@ const bodyFilterer = (( ) => {
|
|||
};
|
||||
|
||||
return class Session extends µb.FilteringContext {
|
||||
constructor(fctxt, details, mime, charset) {
|
||||
constructor(fctxt, mime, charset) {
|
||||
super(fctxt);
|
||||
this.entity = entityFromDomain(this.getDomain());
|
||||
this.stream = null;
|
||||
|
@ -848,6 +870,7 @@ const bodyFilterer = (( ) => {
|
|||
this.mime = mime;
|
||||
this.charset = charset;
|
||||
this.str = null;
|
||||
this.modified = false;
|
||||
this.jobs = [];
|
||||
}
|
||||
getString() {
|
||||
|
@ -865,6 +888,7 @@ const bodyFilterer = (( ) => {
|
|||
}
|
||||
setString(s) {
|
||||
this.str = s;
|
||||
this.modified = true;
|
||||
}
|
||||
addJob(job) {
|
||||
this.jobs.push(job);
|
||||
|
@ -881,7 +905,7 @@ const bodyFilterer = (( ) => {
|
|||
static canFilter(fctxt, details) {
|
||||
if ( µb.canFilterResponseData !== true ) { return; }
|
||||
|
||||
if ( (fctxt.itype & NOT_TEXT_TYPES) !== 0 ) { return; }
|
||||
if ( (fctxt.itype & BINARY_TYPES) !== 0 ) { return; }
|
||||
|
||||
if ( fctxt.method !== fc.METHOD_GET ) {
|
||||
if ( fctxt.method !== fc.METHOD_POST ) {
|
||||
|
@ -891,9 +915,7 @@ const bodyFilterer = (( ) => {
|
|||
|
||||
// https://github.com/gorhill/uBlock/issues/3478
|
||||
const statusCode = details.statusCode || 0;
|
||||
if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) {
|
||||
return;
|
||||
}
|
||||
if ( statusCode === 0 ) { return; }
|
||||
|
||||
const hostname = fctxt.getHostname();
|
||||
if ( hostname === '' ) { return; }
|
||||
|
@ -907,8 +929,9 @@ const bodyFilterer = (( ) => {
|
|||
}
|
||||
}
|
||||
|
||||
const contentType = headerValueFromName('content-type', headers);
|
||||
let mime = 'text/plain', charset;
|
||||
let mime = 'text/plain', charset = 'utf-8';
|
||||
const contentType = headerValueFromName('content-type', headers) ||
|
||||
contentTypeFromDetails(details);
|
||||
if ( contentType !== '' ) {
|
||||
mime = mimeFromContentType(contentType);
|
||||
if ( mime === undefined ) { return; }
|
||||
|
@ -916,6 +939,8 @@ const bodyFilterer = (( ) => {
|
|||
if ( charset !== undefined ) {
|
||||
charset = textEncode.normalizeCharset(charset);
|
||||
if ( charset === undefined ) { return; }
|
||||
} else {
|
||||
charset = charsetFromMime(mime);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -923,7 +948,7 @@ const bodyFilterer = (( ) => {
|
|||
if ( otherValidMimes.has(mime) === false ) { return; }
|
||||
}
|
||||
|
||||
return new Session(fctxt, details, mime, charset);
|
||||
return new Session(fctxt, mime, charset);
|
||||
}
|
||||
};
|
||||
})();
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
<input type="search" placeholder="logFilterPrompt">
|
||||
<span id="filterExprButton" class="button fa-icon expanded" data-i18n-title="loggerRowFiltererBuiltinTip">angle-up</span>
|
||||
<div id="filterExprPicker">
|
||||
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span><span data-filtex="\t--\t|\t<<\t|\t##" data-i18n="loggerRowFiltererBuiltinBlocked"></span><span data-filtex="\t\+\+\t|\t\*\*\t|\t#@#" data-i18n="loggerRowFiltererBuiltinAllowed"></span><span data-filtex="[$,](?:csp|permissions|removeparam|redirect-rule)=|\t\<\<\t" data-i18n="loggerRowFiltererBuiltinModified"></span></div>
|
||||
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span><span data-filtex="\t--\t|\t<<\t|\t##" data-i18n="loggerRowFiltererBuiltinBlocked"></span><span data-filtex="\t\+\+\t|\t\*\*\t|\t#@#" data-i18n="loggerRowFiltererBuiltinAllowed"></span><span data-filtex="[$,](?:csp|permissions|removeparam|redirect-rule|replace|urltransform)=|\t\<\<\t" data-i18n="loggerRowFiltererBuiltinModified"></span></div>
|
||||
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span>
|
||||
<span style="flex-direction: column;">
|
||||
<div><span data-filtex="\t(?:css|(?:inline-)?font)\t">css/font</span><span data-filtex="\timage\t">image</span><span data-filtex="\tmedia\t">media</span><span data-filtex="\t(?:inline-)?script(?:ing)?\t">script</span></div>
|
||||
|
|
Loading…
Reference in New Issue