Fix various code paths in bodyFilterer

Related to extracting charset of response body.
This commit is contained in:
Raymond Hill 2023-11-05 10:32:23 -05:00
parent 1cf634cce0
commit 5f78d83fea
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
2 changed files with 55 additions and 30 deletions

View File

@ -637,14 +637,11 @@ function textResponseFilterer(session, directives) {
));
applied.push(directive);
}
if ( applied && logger.enabled ) {
if ( applied.length === 0 ) { return; }
if ( logger.enabled !== true ) { return; }
session.setRealm('network')
.pushFilters(applied.map(a => a.logData()))
.toLogger();
}
return applied.length !== 0;
}
/******************************************************************************/
@ -660,11 +657,8 @@ function htmlResponseFilterer(session, selectors) {
session.mime
);
if ( selectors !== undefined ) {
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) {
return false;
}
}
if ( selectors === undefined ) { return; }
if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) { return; }
// https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
const doctypeStr = [
@ -674,8 +668,6 @@ function htmlResponseFilterer(session, selectors) {
doc.documentElement.outerHTML,
].join('\n');
session.setString(doctypeStr);
return true;
}
htmlResponseFilterer.domParser = null;
htmlResponseFilterer.xmlSerializer = null;
@ -692,7 +684,7 @@ htmlResponseFilterer.xmlSerializer = null;
const bodyFilterer = (( ) => {
const sessions = new Map();
const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i;
const reContentTypeDocument = /^([^;]+)(?:;|$)/i;
const reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
const otherValidMimes = new Set([
'application/javascript',
@ -700,14 +692,27 @@ const bodyFilterer = (( ) => {
'application/xml',
'application/xhtml+xml',
]);
const NOT_TEXT_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET;
const BINARY_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET;
const MAX_BUFFER_LENGTH = 3 * 1024 * 1024;
let textDecoder, textEncoder;
const contentTypeFromDetails = details => {
switch ( details.type ) {
case 'script':
return 'text/javascript; charset=utf-8';
case 'stylesheet':
return 'text/css';
default:
break;
}
return '';
};
const mimeFromContentType = contentType => {
const match = reContentTypeDocument.exec(contentType);
if ( match === null ) { return; }
return match[0].toLowerCase();
return match[1].toLowerCase();
};
const charsetFromContentType = contentType => {
@ -716,6 +721,19 @@ const bodyFilterer = (( ) => {
return match[1].toLowerCase();
};
const charsetFromMime = mime => {
switch ( mime ) {
case 'application/xml':
case 'application/xhtml+xml':
case 'text/html':
case 'text/css':
return;
default:
break;
}
return 'utf-8';
};
const charsetFromStream = bytes => {
if ( bytes.length < 3 ) { return; }
if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) {
@ -796,6 +814,11 @@ const bodyFilterer = (( ) => {
buffer.set(session.buffer);
buffer.set(new Uint8Array(ev.data), session.buffer.byteLength);
session.buffer = buffer;
if ( session.buffer.length >= MAX_BUFFER_LENGTH ) {
sessions.delete(this);
this.write(session.buffer);
this.disconnect();
}
};
const onStreamStop = function() {
@ -816,12 +839,11 @@ const bodyFilterer = (( ) => {
session.charset = charsetUsed;
}
let modified = false;
while ( session.jobs.length !== 0 ) {
const job = session.jobs.shift();
modified = job.fn(session, ...job.args) || modified;
job.fn(session, ...job.args);
}
if ( modified !== true ) { return streamClose(session); }
if ( session.modified !== true ) { return streamClose(session); }
if ( textEncoder === undefined ) {
textEncoder = new TextEncoder();
@ -840,7 +862,7 @@ const bodyFilterer = (( ) => {
};
return class Session extends µb.FilteringContext {
constructor(fctxt, details, mime, charset) {
constructor(fctxt, mime, charset) {
super(fctxt);
this.entity = entityFromDomain(this.getDomain());
this.stream = null;
@ -848,6 +870,7 @@ const bodyFilterer = (( ) => {
this.mime = mime;
this.charset = charset;
this.str = null;
this.modified = false;
this.jobs = [];
}
getString() {
@ -865,6 +888,7 @@ const bodyFilterer = (( ) => {
}
setString(s) {
this.str = s;
this.modified = true;
}
addJob(job) {
this.jobs.push(job);
@ -881,7 +905,7 @@ const bodyFilterer = (( ) => {
static canFilter(fctxt, details) {
if ( µb.canFilterResponseData !== true ) { return; }
if ( (fctxt.itype & NOT_TEXT_TYPES) !== 0 ) { return; }
if ( (fctxt.itype & BINARY_TYPES) !== 0 ) { return; }
if ( fctxt.method !== fc.METHOD_GET ) {
if ( fctxt.method !== fc.METHOD_POST ) {
@ -891,9 +915,7 @@ const bodyFilterer = (( ) => {
// https://github.com/gorhill/uBlock/issues/3478
const statusCode = details.statusCode || 0;
if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) {
return;
}
if ( statusCode === 0 ) { return; }
const hostname = fctxt.getHostname();
if ( hostname === '' ) { return; }
@ -907,8 +929,9 @@ const bodyFilterer = (( ) => {
}
}
const contentType = headerValueFromName('content-type', headers);
let mime = 'text/plain', charset;
let mime = 'text/plain', charset = 'utf-8';
const contentType = headerValueFromName('content-type', headers) ||
contentTypeFromDetails(details);
if ( contentType !== '' ) {
mime = mimeFromContentType(contentType);
if ( mime === undefined ) { return; }
@ -916,6 +939,8 @@ const bodyFilterer = (( ) => {
if ( charset !== undefined ) {
charset = textEncode.normalizeCharset(charset);
if ( charset === undefined ) { return; }
} else {
charset = charsetFromMime(mime);
}
}
@ -923,7 +948,7 @@ const bodyFilterer = (( ) => {
if ( otherValidMimes.has(mime) === false ) { return; }
}
return new Session(fctxt, details, mime, charset);
return new Session(fctxt, mime, charset);
}
};
})();

View File

@ -57,7 +57,7 @@
<input type="search" placeholder="logFilterPrompt">
<span id="filterExprButton" class="button fa-icon expanded" data-i18n-title="loggerRowFiltererBuiltinTip">angle-up</span>
<div id="filterExprPicker">
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span><span data-filtex="\t--\t|\t<<\t|\t##" data-i18n="loggerRowFiltererBuiltinBlocked"></span><span data-filtex="\t\+\+\t|\t\*\*\t|\t#@#" data-i18n="loggerRowFiltererBuiltinAllowed"></span><span data-filtex="[$,](?:csp|permissions|removeparam|redirect-rule)=|\t\<\<\t" data-i18n="loggerRowFiltererBuiltinModified"></span></div>
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span><span data-filtex="\t--\t|\t<<\t|\t##" data-i18n="loggerRowFiltererBuiltinBlocked"></span><span data-filtex="\t\+\+\t|\t\*\*\t|\t#@#" data-i18n="loggerRowFiltererBuiltinAllowed"></span><span data-filtex="[$,](?:csp|permissions|removeparam|redirect-rule|replace|urltransform)=|\t\<\<\t" data-i18n="loggerRowFiltererBuiltinModified"></span></div>
<div><span data-filtex="!" data-i18n="loggerRowFiltererBuiltinNot"></span>
<span style="flex-direction: column;">
<div><span data-filtex="\t(?:css|(?:inline-)?font)\t">css/font</span><span data-filtex="\timage\t">image</span><span data-filtex="\tmedia\t">media</span><span data-filtex="\t(?:inline-)?script(?:ing)?\t">script</span></div>