From fd036a51ee2055216cab2be8e65deb5bfd72d5b2 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Wed, 28 Jun 2023 19:35:22 -0400 Subject: [PATCH] Add compatibility with AdGuard's `#%#//scriptlet(...)` syntax Related issue: - https://github.com/AdguardTeam/Scriptlets/issues/332 Additionally, uBO's own scriplet syntax now also accept quoting the parameters with either `'` or `"`. This can be used to avoid having to escape commas when they are present in a parameter. --- assets/resources/scriptlets.js | 92 ++++++++--- src/js/background.js | 4 +- src/js/reverselookup-worker.js | 2 +- src/js/reverselookup.js | 13 +- src/js/scriptlet-filtering.js | 99 ++++-------- src/js/static-ext-filtering-db.js | 7 +- src/js/static-ext-filtering.js | 4 +- src/js/static-filtering-parser.js | 254 ++++++++++++++++++++---------- 8 files changed, 292 insertions(+), 183 deletions(-) diff --git a/assets/resources/scriptlets.js b/assets/resources/scriptlets.js index 78869046e..c0ea19a2c 100644 --- a/assets/resources/scriptlets.js +++ b/assets/resources/scriptlets.js @@ -763,7 +763,11 @@ function setCookieHelper( builtinScriptlets.push({ name: 'abort-current-script.js', - aliases: [ 'acs.js', 'abort-current-inline-script.js', 'acis.js' ], + aliases: [ + 'acs.js', + 'abort-current-inline-script.js', + 'acis.js', + ], fn: abortCurrentScript, dependencies: [ 'abort-current-script-core.fn', @@ -786,7 +790,9 @@ function abortCurrentScript( builtinScriptlets.push({ name: 'abort-on-property-read.js', - aliases: [ 'aopr.js' ], + aliases: [ + 'aopr.js', + ], fn: abortOnPropertyRead, dependencies: [ 'get-exception-token.fn', @@ -840,7 +846,9 @@ function abortOnPropertyRead( builtinScriptlets.push({ name: 'abort-on-property-write.js', - aliases: [ 'aopw.js' ], + aliases: [ + 'aopw.js', + ], fn: abortOnPropertyWrite, dependencies: [ 'get-exception-token.fn', @@ -872,7 +880,9 @@ function abortOnPropertyWrite( builtinScriptlets.push({ name: 'abort-on-stack-trace.js', - aliases: [ 'aost.js' ], + aliases: [ + 'aost.js', + ], fn: abortOnStackTrace, dependencies: [ 'get-exception-token.fn', @@ -978,7 +988,10 @@ function abortOnStackTrace( builtinScriptlets.push({ name: 'addEventListener-defuser.js', - aliases: [ 'aeld.js' ], + aliases: [ + 'aeld.js', + 'prevent-addEventListener.js', + ], fn: addEventListenerDefuser, dependencies: [ 'get-extra-args.fn', @@ -1106,7 +1119,9 @@ function evaldataPrune( builtinScriptlets.push({ name: 'nano-setInterval-booster.js', - aliases: [ 'nano-sib.js' ], + aliases: [ + 'nano-sib.js', + ], fn: nanoSetIntervalBooster, dependencies: [ 'pattern-to-regex.fn', @@ -1155,7 +1170,9 @@ function nanoSetIntervalBooster( builtinScriptlets.push({ name: 'nano-setTimeout-booster.js', - aliases: [ 'nano-stb.js' ], + aliases: [ + 'nano-stb.js', + ], fn: nanoSetTimeoutBooster, dependencies: [ 'pattern-to-regex.fn', @@ -1205,6 +1222,9 @@ function nanoSetTimeoutBooster( builtinScriptlets.push({ name: 'noeval-if.js', + aliases: [ + 'prevent-eval-if.js', + ], fn: noEvalIf, dependencies: [ 'pattern-to-regex.fn', @@ -1228,6 +1248,9 @@ function noEvalIf( builtinScriptlets.push({ name: 'no-fetch-if.js', + aliases: [ + 'prevent-fetch.js', + ], fn: noFetchIf, dependencies: [ 'pattern-to-regex.fn', @@ -1330,7 +1353,9 @@ function refreshDefuser( builtinScriptlets.push({ name: 'remove-attr.js', - aliases: [ 'ra.js' ], + aliases: [ + 'ra.js', + ], fn: removeAttr, dependencies: [ 'run-at.fn', @@ -1396,7 +1421,9 @@ function removeAttr( builtinScriptlets.push({ name: 'remove-class.js', - aliases: [ 'rc.js' ], + aliases: [ + 'rc.js', + ], fn: removeClass, dependencies: [ 'run-at.fn', @@ -1460,7 +1487,10 @@ function removeClass( builtinScriptlets.push({ name: 'no-requestAnimationFrame-if.js', - aliases: [ 'norafif.js' ], + aliases: [ + 'norafif.js', + 'prevent-requestAnimationFrame.js', + ], fn: noRequestAnimationFrameIf, dependencies: [ 'pattern-to-regex.fn', @@ -1495,7 +1525,9 @@ function noRequestAnimationFrameIf( builtinScriptlets.push({ name: 'set-constant.js', - aliases: [ 'set.js' ], + aliases: [ + 'set.js', + ], fn: setConstant, dependencies: [ 'set-constant-core.fn' @@ -1511,7 +1543,10 @@ function setConstant( builtinScriptlets.push({ name: 'no-setInterval-if.js', - aliases: [ 'nosiif.js' ], + aliases: [ + 'nosiif.js', + 'prevent-setInterval.js', + ], fn: noSetIntervalIf, dependencies: [ 'pattern-to-regex.fn', @@ -1568,7 +1603,11 @@ function noSetIntervalIf( builtinScriptlets.push({ name: 'no-setTimeout-if.js', - aliases: [ 'nostif.js', 'setTimeout-defuser.js' ], + aliases: [ + 'nostif.js', + 'prevent-setTimeout.js', + 'setTimeout-defuser.js', + ], fn: noSetTimeoutIf, dependencies: [ 'pattern-to-regex.fn', @@ -1692,6 +1731,9 @@ function webrtcIf( builtinScriptlets.push({ name: 'no-xhr-if.js', + aliases: [ + 'prevent-xhr.js', + ], fn: noXhrIf, dependencies: [ 'pattern-to-regex.fn', @@ -1765,7 +1807,9 @@ function noXhrIf( builtinScriptlets.push({ name: 'no-window-open-if.js', - aliases: [ 'nowoif.js' ], + aliases: [ + 'nowoif.js', + ], fn: noWindowOpenIf, dependencies: [ 'get-extra-args.fn', @@ -2699,7 +2743,9 @@ function spoofCSS( builtinScriptlets.push({ name: 'remove-node-text.js', - aliases: [ 'rmnt.js' ], + aliases: [ + 'rmnt.js', + ], fn: removeNodeText, world: 'ISOLATED', dependencies: [ @@ -2786,9 +2832,9 @@ function setLocalStorageItem( value = '' ) { if ( key === '' ) { return; } - if ( value === '' ) { return; } const validValues = [ + '', 'undefined', 'null', 'false', 'true', 'yes', 'no', @@ -2805,7 +2851,11 @@ function setLocalStorageItem( } try { - self.localStorage.setItem(key, `${actualValue}`); + if ( actualValue !== undefined ) { + self.localStorage.setItem(key, `${actualValue}`); + } else { + self.localStorage.removeItem(key); + } } catch(ex) { } } @@ -2849,7 +2899,9 @@ function setLocalStorageItem( builtinScriptlets.push({ name: 'replace-node-text.js', requiresTrust: true, - aliases: [ 'rpnt.js', 'sed.js' /* to be removed */ ], + aliases: [ + 'rpnt.js', + ], fn: replaceNodeText, world: 'ISOLATED', dependencies: [ @@ -2877,7 +2929,9 @@ function replaceNodeText( builtinScriptlets.push({ name: 'trusted-set-constant.js', requiresTrust: true, - aliases: [ 'trusted-set.js' ], + aliases: [ + 'trusted-set.js', + ], fn: trustedSetConstant, dependencies: [ 'set-constant-core.fn' diff --git a/src/js/background.js b/src/js/background.js index e25beedc9..0e645fa77 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -176,8 +176,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 55, // Increase when compiled format changes - selfieMagic: 55, // Increase when selfie format changes + compiledMagic: 56, // Increase when compiled format changes + selfieMagic: 56, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js index 516971a86..0b5ccf5ab 100644 --- a/src/js/reverselookup-worker.js +++ b/src/js/reverselookup-worker.js @@ -237,7 +237,7 @@ const fromExtendedFilter = function(details) { // Scriptlet injection case 32: if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } - if ( fargs[3] !== selector ) { break; } + if ( fargs[3] !== details.compiled ) { break; } if ( hostnameMatches(fargs[1]) ) { found = fargs[1] + prefix + selector; } diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index fedd9cb3a..2d40b5cc1 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -167,6 +167,16 @@ const fromExtendedFilter = async function(details) { const id = messageId++; const hostname = hostnameFromURI(details.url); + const parser = new sfp.AstFilterParser({ + expertMode: true, + nativeCssHas: vAPI.webextFlavor.env.includes('native_css_has'), + }); + parser.parse(details.rawFilter); + let compiled; + if ( parser.isScriptletFilter() ) { + compiled = JSON.stringify(parser.getScripletArgs()); + } + worker.postMessage({ what: 'fromExtendedFilter', id, @@ -182,7 +192,8 @@ const fromExtendedFilter = async function(details) { 'specifichide', details.url ) === 2, - rawFilter: details.rawFilter + rawFilter: details.rawFilter, + compiled, }); return new Promise(resolve => { diff --git a/src/js/scriptlet-filtering.js b/src/js/scriptlet-filtering.js index 267392e17..a16f4a560 100644 --- a/src/js/scriptlet-filtering.js +++ b/src/js/scriptlet-filtering.js @@ -27,7 +27,6 @@ import µb from './background.js'; import { redirectEngine as reng } from './redirect-engine.js'; import { sessionFirewall } from './filtering-engines.js'; import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js'; -import * as sfp from './static-filtering-parser.js'; import { domainFromHostname, @@ -37,11 +36,13 @@ import { /******************************************************************************/ +// Increment when internal representation changes +const VERSION = 1; + const duplicates = new Set(); const scriptletCache = new µb.MRUCache(32); -const reEscapeScriptArg = /[\\'"]/g; -const scriptletDB = new StaticExtFilteringHostnameDB(1); +const scriptletDB = new StaticExtFilteringHostnameDB(1, VERSION); let acceptedCount = 0; let discardedCount = 0; @@ -156,24 +157,8 @@ const isolatedWorldInjector = (( ) => { }; })(); -// TODO: Probably should move this into StaticFilteringParser -// https://github.com/uBlockOrigin/uBlock-issues/issues/1031 -// Normalize scriptlet name to its canonical, unaliased name. const normalizeRawFilter = function(parser, sourceIsTrusted = false) { - const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_SCRIPTLET); - const walker = parser.getWalker(root); - const args = []; - for ( let node = walker.next(); node !== 0; node = walker.next() ) { - switch ( parser.getNodeType(node) ) { - case sfp.NODE_TYPE_EXT_PATTERN_SCRIPTLET_TOKEN: - case sfp.NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARG: - args.push(parser.getNodeString(node)); - break; - default: - break; - } - } - walker.dispose(); + const args = parser.getScripletArgs(); if ( args.length !== 0 ) { let token = `${args[0]}.js`; if ( reng.aliases.has(token) ) { @@ -184,28 +169,17 @@ const normalizeRawFilter = function(parser, sourceIsTrusted = false) { } args[0] = token.slice(0, -3); } - return `+js(${args.join(', ')})`; + return JSON.stringify(args); }; const lookupScriptlet = function(rawToken, mainMap, isolatedMap) { if ( mainMap.has(rawToken) || isolatedMap.has(rawToken) ) { return; } - const pos = rawToken.indexOf(','); - let token, args = ''; - if ( pos === -1 ) { - token = rawToken; - } else { - token = rawToken.slice(0, pos).trim(); - args = rawToken.slice(pos + 1).trim(); - } - if ( reng.aliases.has(token) ) { - token = reng.aliases.get(token); - } else { - token = `${token}.js`; - } + const args = JSON.parse(rawToken); + const token = `${args[0]}.js`; const details = reng.contentFromName(token, 'text/javascript'); if ( details === undefined ) { return; } const targetWorldMap = details.world !== 'ISOLATED' ? mainMap : isolatedMap; - const content = patchScriptlet(details.js, args); + const content = patchScriptlet(details.js, args.slice(1)); const dependencies = details.dependencies || []; while ( dependencies.length !== 0 ) { const token = dependencies.shift(); @@ -227,43 +201,34 @@ const lookupScriptlet = function(rawToken, mainMap, isolatedMap) { }; // Fill-in scriptlet argument placeholders. -const patchScriptlet = function(content, args) { +const patchScriptlet = function(content, arglist) { if ( content.startsWith('function') && content.endsWith('}') ) { content = `(${content})({{args}});`; } - if ( args.startsWith('{') && args.endsWith('}') ) { - return content.replace('{{args}}', args); - } - if ( args === '' ) { + if ( arglist.length === 0 ) { return content.replace('{{args}}', ''); } - const arglist = []; - let s = args; - let len = s.length; - let beg = 0, pos = 0; - let i = 1; - while ( beg < len ) { - pos = s.indexOf(',', pos); - // Escaped comma? If so, skip. - if ( pos > 0 && s.charCodeAt(pos - 1) === 0x5C /* '\\' */ ) { - s = s.slice(0, pos - 1) + s.slice(pos); - len -= 1; - continue; + if ( arglist.length === 1 ) { + if ( arglist[0].startsWith('{') && arglist[0].endsWith('}') ) { + return content.replace('{{args}}', arglist[0]); } - if ( pos === -1 ) { pos = len; } - arglist.push(s.slice(beg, pos).trim().replace(reEscapeScriptArg, '\\$&')); - beg = pos = pos + 1; - i++; } for ( let i = 0; i < arglist.length; i++ ) { content = content.replace(`{{${i+1}}}`, arglist[i]); } - return content.replace( - '{{args}}', + return content.replace('{{args}}', arglist.map(a => `'${a}'`).join(', ').replace(/\$/g, '$$$') ); }; +const decompile = function(json) { + const args = JSON.parse(json).map(s => s.replace(/,/g, '\\,')); + if ( args.length === 0 ) { return '+js()'; } + return `+js(${args.join(', ')})`; +}; + +/******************************************************************************/ + scriptletFilteringEngine.logFilters = function(tabId, url, filters) { if ( typeof filters !== 'string' ) { return; } const fctxt = µb.filteringContext @@ -303,7 +268,7 @@ scriptletFilteringEngine.compile = function(parser, writer) { if ( normalized === undefined ) { return; } // Tokenless is meaningful only for exception filters. - if ( normalized === '+js()' && isException === false ) { return; } + if ( normalized === '[]' && isException === false ) { return; } if ( parser.hasOptions() === false ) { if ( isException ) { @@ -329,11 +294,6 @@ scriptletFilteringEngine.compile = function(parser, writer) { } }; -// 01234567890123456789 -// +js(token[, arg[, ...]]) -// ^ ^ -// 4 -1 - scriptletFilteringEngine.fromCompiledContent = function(reader) { reader.select('SCRIPTLET_FILTERS'); @@ -347,7 +307,7 @@ scriptletFilteringEngine.fromCompiledContent = function(reader) { duplicates.add(fingerprint); const args = reader.args(); if ( args.length < 4 ) { continue; } - scriptletDB.store(args[1], args[2], args[3].slice(4, -1)); + scriptletDB.store(args[1], args[2], args[3]); } }; @@ -387,7 +347,7 @@ scriptletFilteringEngine.retrieve = function(request) { if ( $scriptlets.size === 0 ) { return; } // Wholly disable scriptlet injection? - if ( $exceptions.has('') ) { + if ( $exceptions.has('[]') ) { return { filters: [ { tabId: request.tabId, url: request.url, filter: '#@#+js()' } @@ -417,8 +377,8 @@ scriptletFilteringEngine.retrieve = function(request) { mainWorld: mainWorldCode.join('\n\n'), isolatedWorld: isolatedWorldCode.join('\n\n'), filters: [ - ...Array.from($scriptlets).map(s => `##+js(${s})`), - ...Array.from($exceptions).map(s => `#@#+js(${s})`), + ...Array.from($scriptlets).map(s => `##${decompile(s)}`), + ...Array.from($exceptions).map(s => `#@#${decompile(s)}`), ].join('\n'), }; scriptletCache.add(hostname, cacheDetails); @@ -519,7 +479,10 @@ scriptletFilteringEngine.toSelfie = function() { }; scriptletFilteringEngine.fromSelfie = function(selfie) { + if ( selfie instanceof Object === false ) { return false; } + if ( selfie.version !== VERSION ) { return false; } scriptletDB.fromSelfie(selfie); + return true; }; /******************************************************************************/ diff --git a/src/js/static-ext-filtering-db.js b/src/js/static-ext-filtering-db.js index 07638b96d..7f6cbd20e 100644 --- a/src/js/static-ext-filtering-db.js +++ b/src/js/static-ext-filtering-db.js @@ -24,7 +24,8 @@ /******************************************************************************/ const StaticExtFilteringHostnameDB = class { - constructor(nBits, selfie = undefined) { + constructor(nBits, version = 0) { + this.version = version; this.nBits = nBits; this.strToIdMap = new Map(); this.hostnameToSlotIdMap = new Map(); @@ -35,9 +36,6 @@ const StaticExtFilteringHostnameDB = class { // Array of strings (selectors and pseudo-selectors) this.strSlots = []; this.size = 0; - if ( selfie !== undefined ) { - this.fromSelfie(selfie); - } this.cleanupTimer = vAPI.defer.create(( ) => { this.strToIdMap.clear(); }); @@ -142,6 +140,7 @@ const StaticExtFilteringHostnameDB = class { toSelfie() { return { + version: this.version, hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap), regexToSlotIdMap: Array.from(this.regexToSlotIdMap), hostnameSlots: this.hostnameSlots, diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js index 03c13cb39..b435d0d82 100644 --- a/src/js/static-ext-filtering.js +++ b/src/js/static-ext-filtering.js @@ -168,9 +168,11 @@ staticExtFilteringEngine.fromSelfie = function(path) { } if ( selfie instanceof Object === false ) { return false; } cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); - scriptletFilteringEngine.fromSelfie(selfie.scriptlets); httpheaderFilteringEngine.fromSelfie(selfie.httpHeaders); htmlFilteringEngine.fromSelfie(selfie.html); + if ( scriptletFilteringEngine.fromSelfie(selfie.scriptlets) === false ) { + return false; + } return true; }); }; diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 16c05334e..7d3e0d529 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -85,6 +85,7 @@ export const AST_FLAG_HAS_ERROR = 1 << iota++; export const AST_FLAG_IS_EXCEPTION = 1 << iota++; export const AST_FLAG_EXT_STRONG = 1 << iota++; export const AST_FLAG_EXT_STYLE = 1 << iota++; +export const AST_FLAG_EXT_SCRIPTLET_ADG = 1 << iota++; export const AST_FLAG_NET_PATTERN_LEFT_HNANCHOR = 1 << iota++; export const AST_FLAG_NET_PATTERN_RIGHT_PATHANCHOR = 1 << iota++; export const AST_FLAG_NET_PATTERN_LEFT_ANCHOR = 1 << iota++; @@ -793,6 +794,10 @@ export class AstFilterParser { // TODO: mind maxTokenLength this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; + this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/; + this.reUnescapeCommas = /((?:^|[^\\])(?:\\\\)*)\\,/g; + this.reUnescapeSingleQuotes = /((?:^|[^\\])(?:\\\\)*)\\'/g; + this.reUnescapeDoubleQuotes = /((?:^|[^\\])(?:\\\\)*)\\"/g; } parse(raw) { @@ -2070,7 +2075,13 @@ export class AstFilterParser { parentEnd ); this.addNodeToRegister(NODE_TYPE_EXT_PATTERN_RAW, next); - this.linkDown(next, this.parseExtPattern(next)); + const down = this.parseExtPattern(next); + if ( down !== 0 ) { + this.linkDown(next, down); + } else { + this.addNodeFlags(next, NODE_FLAG_ERROR); + this.addFlags(AST_FLAG_HAS_ERROR); + } this.linkRight(prev, next); this.validateExt(); return this.throwHeadNode(head); @@ -2079,6 +2090,7 @@ export class AstFilterParser { extFlagsFromAnchor(anchorBeg) { let c = this.charCodeAt(anchorBeg+1) ; if ( c === 0x23 /* # */ ) { return 0; } + if ( c === 0x25 /* % */ ) { return AST_FLAG_EXT_SCRIPTLET_ADG; } if ( c === 0x3F /* ? */ ) { return AST_FLAG_EXT_STRONG; } if ( c === 0x24 /* $ */ ) { c = this.charCodeAt(anchorBeg+2); @@ -2123,15 +2135,24 @@ export class AstFilterParser { parseExtPattern(parent) { const c = this.charCodeAt(this.nodes[parent+NODE_BEG_INDEX]); // ##+js(...) - if ( c === 0x2B /* '+' */ ) { + if ( c === 0x2B /* + */ ) { const s = this.getNodeString(parent); if ( /^\+js\(.*\)$/.exec(s) !== null ) { this.astTypeFlavor = AST_TYPE_EXTENDED_SCRIPTLET; return this.parseExtPatternScriptlet(parent); } } + // #%#//scriptlet(...) + if ( this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) ) { + const s = this.getNodeString(parent); + if ( /^\/\/scriptlet\(.*\)$/.exec(s) !== null ) { + this.astTypeFlavor = AST_TYPE_EXTENDED_SCRIPTLET; + return this.parseExtPatternScriptlet(parent); + } + return 0; + } // ##^... | ##^responseheader(...) - if ( c === 0x5E /* '^' */ ) { + if ( c === 0x5E /* ^ */ ) { const s = this.getNodeString(parent); if ( this.reResponseheaderPattern.test(s) ) { this.astTypeFlavor = AST_TYPE_EXTENDED_RESPONSEHEADER; @@ -2149,24 +2170,14 @@ export class AstFilterParser { const beg = this.nodes[parent+NODE_BEG_INDEX]; const end = this.nodes[parent+NODE_END_INDEX]; const s = this.getNodeString(parent); - const rawArg0 = beg + 4; + const rawArg0 = beg + (s.startsWith('+js') ? 4 : 12); const rawArg1 = end - 1; const head = this.allocTypedNode(NODE_TYPE_EXT_DECORATION, beg, rawArg0); let prev = head, next = 0; - const trimmedArg0 = rawArg0 + this.leftWhitespaceCount(s); - const trimmedArg1 = rawArg1 - this.rightWhitespaceCount(s); - if ( trimmedArg0 !== rawArg0 ) { - next = this.allocTypedNode(NODE_TYPE_WHITESPACE, rawArg0, trimmedArg0); - prev = this.linkRight(prev, next); - } - next = this.allocTypedNode(NODE_TYPE_EXT_PATTERN_SCRIPTLET, trimmedArg0, trimmedArg1); + next = this.allocTypedNode(NODE_TYPE_EXT_PATTERN_SCRIPTLET, rawArg0, rawArg1); this.addNodeToRegister(NODE_TYPE_EXT_PATTERN_SCRIPTLET, next); this.linkDown(next, this.parseExtPatternScriptletArgs(next)); prev = this.linkRight(prev, next); - if ( trimmedArg1 !== rawArg1 ) { - next = this.allocTypedNode(NODE_TYPE_WHITESPACE, trimmedArg1, rawArg1); - prev = this.linkRight(prev, next); - } next = this.allocTypedNode(NODE_TYPE_EXT_DECORATION, rawArg1, end); this.linkRight(prev, next); return head; @@ -2181,65 +2192,51 @@ export class AstFilterParser { const s = this.getNodeString(parent); const argsEnd = s.length; // token - let argEnd = this.indexOfNextScriptletArgSeparator(s, 0); - let rawArg = s.slice(0, argEnd); - let argBodyBeg = this.leftWhitespaceCount(rawArg); - if ( argBodyBeg !== 0 ) { + const details = this.parseExtPatternScriptletArg(s, 0); + if ( details.argBeg > 0 ) { next = this.allocTypedNode( NODE_TYPE_EXT_DECORATION, parentBeg, - parentBeg + argBodyBeg + parentBeg + details.argBeg ); prev = this.linkRight(prev, next); } - let argBodyEnd = argEnd - this.rightWhitespaceCount(rawArg); - rawArg = s.slice(argBodyBeg, argBodyEnd); - const tokenEnd = rawArg.endsWith('.js') - ? argBodyEnd - 3 - : argBodyEnd; + const token = s.slice(details.argBeg, details.argEnd); + const tokenEnd = details.argEnd - (token.endsWith('.js') ? 3 : 0); next = this.allocTypedNode( NODE_TYPE_EXT_PATTERN_SCRIPTLET_TOKEN, - parentBeg + argBodyBeg, + parentBeg + details.argBeg, parentBeg + tokenEnd ); + if ( details.failed ) { + this.addNodeFlags(next, NODE_FLAG_ERROR); + this.addFlags(AST_FLAG_HAS_ERROR); + } prev = this.linkRight(prev, next); - // ignore pointless `.js` - if ( tokenEnd !== argBodyEnd ) { + if ( tokenEnd < details.argEnd ) { next = this.allocTypedNode( NODE_TYPE_IGNORE, - parentBeg + argBodyEnd - 3, - parentBeg + argBodyEnd + parentBeg + tokenEnd, + parentBeg + details.argEnd + ); + prev = this.linkRight(prev, next); + } + if ( details.quoteEnd < argsEnd ) { + next = this.allocTypedNode( + NODE_TYPE_EXT_DECORATION, + parentBeg + details.argEnd, + parentBeg + details.separatorEnd ); prev = this.linkRight(prev, next); } // all args - argBodyBeg = argEnd + 1; - const rawArgs = s.slice(argBodyBeg, argsEnd); - argBodyBeg += this.leftWhitespaceCount(rawArgs); next = this.allocTypedNode( - NODE_TYPE_EXT_DECORATION, - parentBeg + argBodyEnd, - parentBeg + argBodyBeg + NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARGS, + parentBeg + details.separatorEnd, + parentBeg + argsEnd ); + this.linkDown(next, this.parseExtPatternScriptletArglist(next)); prev = this.linkRight(prev, next); - argBodyEnd = argsEnd - this.rightWhitespaceCount(rawArgs); - if ( argBodyBeg !== argBodyEnd ) { - next = this.allocTypedNode( - NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARGS, - parentBeg + argBodyBeg, - parentBeg + argBodyEnd - ); - this.linkDown(next, this.parseExtPatternScriptletArglist(next)); - prev = this.linkRight(prev, next); - } - if ( argBodyEnd !== argsEnd ) { - next = this.allocTypedNode( - NODE_TYPE_EXT_DECORATION, - parentBeg + argBodyEnd, - parentBeg + argsEnd - ); - prev = this.linkRight(prev, next); - } return this.throwHeadNode(head); } @@ -2257,7 +2254,7 @@ export class AstFilterParser { parentBeg, parentEnd ); - try { + try { void JSON.parse(s); } catch(ex) { this.addNodeFlags(next, NODE_FLAG_ERROR); @@ -2269,48 +2266,131 @@ export class AstFilterParser { const head = this.allocHeadNode(); const argsEnd = s.length; let prev = head; - let argBodyBeg = 0, argBodyEnd = 0, argEnd = 0; - let t = ''; - while ( argBodyBeg < argsEnd ) { - argEnd = this.indexOfNextScriptletArgSeparator(s, argBodyBeg); - t = s.slice(argBodyBeg, argEnd); - argBodyEnd = argEnd - this.rightWhitespaceCount(t); - next = this.allocTypedNode( - NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARG, - parentBeg + argBodyBeg, - parentBeg + argBodyEnd - ); - prev = this.linkRight(prev, next); - if ( argEnd === argsEnd ) { break; } - t = s.slice(argEnd + 1); - argBodyBeg = argEnd + 1 + this.leftWhitespaceCount(t); - if ( argBodyEnd !== argBodyBeg ) { + let decorationBeg = 0; + let i = 0; + for (;;) { + const details = this.parseExtPatternScriptletArg(s, i); + if ( decorationBeg < details.argBeg ) { next = this.allocTypedNode( NODE_TYPE_EXT_DECORATION, - parentBeg + argBodyEnd, - parentBeg + argBodyBeg + parentBeg + decorationBeg, + parentBeg + details.argBeg ); prev = this.linkRight(prev, next); } + if ( i === argsEnd ) { break; } + next = this.allocTypedNode( + NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARG, + parentBeg + details.argBeg, + parentBeg + details.argEnd + ); + if ( details.transform ) { + this.setNodeTransform(next, this.normalizeScriptletArg( + s.slice(details.argBeg, details.argEnd), + details.separatorCode + )); + } + prev = this.linkRight(prev, next); + if ( details.failed ) { + this.addNodeFlags(next, NODE_FLAG_ERROR); + this.addFlags(AST_FLAG_HAS_ERROR); + } + decorationBeg = details.argEnd; + i = details.separatorEnd; } return this.throwHeadNode(head); } - indexOfNextScriptletArgSeparator(pattern, beg = 0) { - const patternEnd = pattern.length; - if ( beg >= patternEnd ) { return patternEnd; } - const nextComma = pattern.indexOf(',', beg); - if ( nextComma === -1 ) { return patternEnd; } - // An odd number of backslashes immediately before the comma means - // it's being escaped - let backslashCount = 0; - for ( let i = nextComma; i > beg; i-- ) { - if ( pattern.charCodeAt(i-1) !== 0x5C /* \ */ ) { break; } - backslashCount += 1; + parseExtPatternScriptletArg(pattern, beg = 0) { + if ( this.parseExtPatternScriptletArg.details === undefined ) { + this.parseExtPatternScriptletArg.details = { + quoteBeg: 0, argBeg: 0, argEnd: 0, quoteEnd: 0, + separatorCode: 0, separatorBeg: 0, separatorEnd: 0, + transform: false, failed: false, + }; } - return (backslashCount & 1) === 0 - ? nextComma - : this.indexOfNextScriptletArgSeparator(pattern, nextComma + 1); + const details = this.parseExtPatternScriptletArg.details; + const len = pattern.length; + details.quoteBeg = beg + this.leftWhitespaceCount(pattern.slice(beg)); + details.failed = false; + const qc = pattern.charCodeAt(details.quoteBeg); + if ( qc === 0x22 /* " */ || qc === 0x27 /* ' */ ) { + details.separatorCode = qc; + details.argBeg = details.argEnd = details.quoteBeg + 1; + details.transform = false; + this.indexOfNextScriptletArgSeparator(pattern, details); + if ( details.argEnd !== len ) { + details.quoteEnd = details.argEnd + 1; + details.separatorBeg = details.separatorEnd = details.quoteEnd; + details.separatorEnd += this.leftWhitespaceCount(pattern.slice(details.quoteEnd)); + if ( details.separatorEnd === len ) { return details; } + if ( pattern.charCodeAt(details.separatorEnd) === 0x2C ) { + details.separatorEnd += 1; + return details; + } + } + } + details.separatorCode = 0x2C /* , */; + details.argBeg = details.argEnd = details.quoteBeg; + details.transform = false; + this.indexOfNextScriptletArgSeparator(pattern, details); + details.separatorBeg = details.separatorEnd = details.argEnd; + if ( details.separatorBeg < len ) { + details.separatorEnd += 1; + } + details.argEnd -= this.rightWhitespaceCount(pattern.slice(0, details.separatorBeg)); + details.quoteEnd = details.argEnd; + if ( this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) ) { + details.failed = true; + } + return details; + } + + indexOfNextScriptletArgSeparator(pattern, details) { + const separatorChar = String.fromCharCode(details.separatorCode); + while ( details.argEnd < pattern.length ) { + const pos = pattern.indexOf(separatorChar, details.argEnd); + if ( pos === -1 ) { + return (details.argEnd = pattern.length); + } + if ( this.reOddTrailingEscape.test(pattern.slice(0, pos)) === false ) { + return (details.argEnd = pos); + } + details.transform = true; + details.argEnd = pos + 1; + } + } + + normalizeScriptletArg(arg, separatorCode) { + if ( separatorCode === 0x22 /* " */ ) { + if ( arg.includes('"') === false ) { return; } + return arg.replace(this.reUnescapeDoubleQuotes, '$1"'); + } + if ( separatorCode === 0x27 /* ' */ ) { + if ( arg.includes("'") === false ) { return; } + return arg.replace(this.reUnescapeSingleQuotes, "$1'"); + } + if ( arg.includes(',') === false ) { return; } + return arg.replace(this.reUnescapeCommas, '$1,'); + } + + getScripletArgs() { + const args = []; + if ( this.isScriptletFilter() === false ) { return args; } + const root = this.getBranchFromType(NODE_TYPE_EXT_PATTERN_SCRIPTLET); + const walker = this.getWalker(root); + for ( let node = walker.next(); node !== 0; node = walker.next() ) { + switch ( this.getNodeType(node) ) { + case NODE_TYPE_EXT_PATTERN_SCRIPTLET_TOKEN: + case NODE_TYPE_EXT_PATTERN_SCRIPTLET_ARG: + args.push(this.getNodeTransform(node)); + break; + default: + break; + } + } + walker.dispose(); + return args; } parseExtPatternResponseheader(parent) {