From 5f78d83fea09a54a611b00591459081e00a17c08 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sun, 5 Nov 2023 10:32:23 -0500 Subject: [PATCH] Fix various code paths in bodyFilterer Related to extracting charset of response body. --- src/js/traffic.js | 83 ++++++++++++++++++++++++++++++---------------- src/logger-ui.html | 2 +- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/src/js/traffic.js b/src/js/traffic.js index 851f40a41..68f30a001 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -637,14 +637,11 @@ function textResponseFilterer(session, directives) { )); applied.push(directive); } - - if ( applied && logger.enabled ) { - session.setRealm('network') - .pushFilters(applied.map(a => a.logData())) - .toLogger(); - } - - return applied.length !== 0; + if ( applied.length === 0 ) { return; } + if ( logger.enabled !== true ) { return; } + session.setRealm('network') + .pushFilters(applied.map(a => a.logData())) + .toLogger(); } /******************************************************************************/ @@ -660,11 +657,8 @@ function htmlResponseFilterer(session, selectors) { session.mime ); - if ( selectors !== undefined ) { - if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) { - return false; - } - } + if ( selectors === undefined ) { return; } + if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) { return; } // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353 const doctypeStr = [ @@ -674,8 +668,6 @@ function htmlResponseFilterer(session, selectors) { doc.documentElement.outerHTML, ].join('\n'); session.setString(doctypeStr); - - return true; } htmlResponseFilterer.domParser = null; htmlResponseFilterer.xmlSerializer = null; @@ -692,7 +684,7 @@ htmlResponseFilterer.xmlSerializer = null; const bodyFilterer = (( ) => { const sessions = new Map(); - const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i; + const reContentTypeDocument = /^([^;]+)(?:;|$)/i; const reContentTypeCharset = /charset=['"]?([^'" ]+)/i; const otherValidMimes = new Set([ 'application/javascript', @@ -700,14 +692,27 @@ const bodyFilterer = (( ) => { 'application/xml', 'application/xhtml+xml', ]); - const NOT_TEXT_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET; + const BINARY_TYPES = fc.FONT | fc.IMAGE | fc.MEDIA | fc.WEBSOCKET; + const MAX_BUFFER_LENGTH = 3 * 1024 * 1024; let textDecoder, textEncoder; + const contentTypeFromDetails = details => { + switch ( details.type ) { + case 'script': + return 'text/javascript; charset=utf-8'; + case 'stylesheet': + return 'text/css'; + default: + break; + } + return ''; + }; + const mimeFromContentType = contentType => { const match = reContentTypeDocument.exec(contentType); if ( match === null ) { return; } - return match[0].toLowerCase(); + return match[1].toLowerCase(); }; const charsetFromContentType = contentType => { @@ -716,6 +721,19 @@ const bodyFilterer = (( ) => { return match[1].toLowerCase(); }; + const charsetFromMime = mime => { + switch ( mime ) { + case 'application/xml': + case 'application/xhtml+xml': + case 'text/html': + case 'text/css': + return; + default: + break; + } + return 'utf-8'; + }; + const charsetFromStream = bytes => { if ( bytes.length < 3 ) { return; } if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) { @@ -796,6 +814,11 @@ const bodyFilterer = (( ) => { buffer.set(session.buffer); buffer.set(new Uint8Array(ev.data), session.buffer.byteLength); session.buffer = buffer; + if ( session.buffer.length >= MAX_BUFFER_LENGTH ) { + sessions.delete(this); + this.write(session.buffer); + this.disconnect(); + } }; const onStreamStop = function() { @@ -816,12 +839,11 @@ const bodyFilterer = (( ) => { session.charset = charsetUsed; } - let modified = false; while ( session.jobs.length !== 0 ) { const job = session.jobs.shift(); - modified = job.fn(session, ...job.args) || modified; + job.fn(session, ...job.args); } - if ( modified !== true ) { return streamClose(session); } + if ( session.modified !== true ) { return streamClose(session); } if ( textEncoder === undefined ) { textEncoder = new TextEncoder(); @@ -840,7 +862,7 @@ const bodyFilterer = (( ) => { }; return class Session extends µb.FilteringContext { - constructor(fctxt, details, mime, charset) { + constructor(fctxt, mime, charset) { super(fctxt); this.entity = entityFromDomain(this.getDomain()); this.stream = null; @@ -848,6 +870,7 @@ const bodyFilterer = (( ) => { this.mime = mime; this.charset = charset; this.str = null; + this.modified = false; this.jobs = []; } getString() { @@ -865,6 +888,7 @@ const bodyFilterer = (( ) => { } setString(s) { this.str = s; + this.modified = true; } addJob(job) { this.jobs.push(job); @@ -881,7 +905,7 @@ const bodyFilterer = (( ) => { static canFilter(fctxt, details) { if ( µb.canFilterResponseData !== true ) { return; } - if ( (fctxt.itype & NOT_TEXT_TYPES) !== 0 ) { return; } + if ( (fctxt.itype & BINARY_TYPES) !== 0 ) { return; } if ( fctxt.method !== fc.METHOD_GET ) { if ( fctxt.method !== fc.METHOD_POST ) { @@ -891,9 +915,7 @@ const bodyFilterer = (( ) => { // https://github.com/gorhill/uBlock/issues/3478 const statusCode = details.statusCode || 0; - if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) { - return; - } + if ( statusCode === 0 ) { return; } const hostname = fctxt.getHostname(); if ( hostname === '' ) { return; } @@ -907,8 +929,9 @@ const bodyFilterer = (( ) => { } } - const contentType = headerValueFromName('content-type', headers); - let mime = 'text/plain', charset; + let mime = 'text/plain', charset = 'utf-8'; + const contentType = headerValueFromName('content-type', headers) || + contentTypeFromDetails(details); if ( contentType !== '' ) { mime = mimeFromContentType(contentType); if ( mime === undefined ) { return; } @@ -916,6 +939,8 @@ const bodyFilterer = (( ) => { if ( charset !== undefined ) { charset = textEncode.normalizeCharset(charset); if ( charset === undefined ) { return; } + } else { + charset = charsetFromMime(mime); } } @@ -923,7 +948,7 @@ const bodyFilterer = (( ) => { if ( otherValidMimes.has(mime) === false ) { return; } } - return new Session(fctxt, details, mime, charset); + return new Session(fctxt, mime, charset); } }; })(); diff --git a/src/logger-ui.html b/src/logger-ui.html index fc4dae9e6..e8d625be1 100644 --- a/src/logger-ui.html +++ b/src/logger-ui.html @@ -57,7 +57,7 @@ angle-up
-
+
css/fontimagemediascript