From 027c7a4fb5b74207b7dedcbc83f4c70527ea5566 Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Mon, 16 Oct 2023 17:38:57 -0400 Subject: [PATCH] Support quoting scriptlet parameters with backticks (In addition to in already supported single- and double-quote). The parsing of (optionally) quoted arguments from an argument list has been spinned off into a standalone helper in order to be reused in other parts of the parser eventually. --- src/js/static-filtering-parser.js | 192 +++++++++++++++++------------- 1 file changed, 109 insertions(+), 83 deletions(-) diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 614125861..ad84da5d2 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -597,6 +597,106 @@ const exCharCodeAt = (s, i) => { /******************************************************************************/ +class argListParser { + constructor(separator = 0x2C /* , */, mustQuote = false) { + this.separatorCode = separator.charCodeAt(0); + this.mustQuote = mustQuote; + this.quoteBeg = 0; + this.argBeg = 0; + this.argEnd = 0; + this.quoteEnd = 0; + this.actualSeparatorCode = 0; + this.separatorBeg = 0; + this.separatorEnd = 0; + this.transform = false; + this.failed = false; + this.reWhitespaceStart = /^\s+/; + this.reWhitespaceEnd = /\s+$/; + this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/; + this.reUnescapeDoubleQuotes = /((?:^|[^\\])(?:\\\\)*)\\"/g; + this.reUnescapeSingleQuotes = /((?:^|[^\\])(?:\\\\)*)\\'/g; + this.reUnescapeBackticks = /((?:^|[^\\])(?:\\\\)*)\\`/g; + this.reUnescapeCommas = /((?:^|[^\\])(?:\\\\)*)\\,/g; + } + nextArg(pattern, beg = 0) { + const len = pattern.length; + this.quoteBeg = beg + this.leftWhitespaceCount(pattern.slice(beg)); + this.failed = false; + const qc = pattern.charCodeAt(this.quoteBeg); + if ( qc === 0x22 /* " */ || qc === 0x27 /* ' */ || qc === 0x60 /* ` */ ) { + this.actualSeparatorCode = qc; + this.argBeg = this.argEnd = this.quoteBeg + 1; + this.transform = false; + this.indexOfNextScriptletArgSeparator(pattern); + if ( this.argEnd !== len ) { + this.quoteEnd = this.argEnd + 1; + this.separatorBeg = this.separatorEnd = this.quoteEnd; + this.separatorEnd += this.leftWhitespaceCount(pattern.slice(this.quoteEnd)); + if ( this.separatorEnd === len ) { return this; } + if ( pattern.charCodeAt(this.separatorEnd) === this.separatorCode ) { + this.separatorEnd += 1; + return this; + } + } + } + this.actualSeparatorCode = this.separatorCode; + this.argBeg = this.argEnd = this.quoteBeg; + this.transform = false; + this.indexOfNextScriptletArgSeparator(pattern); + this.separatorBeg = this.separatorEnd = this.argEnd; + if ( this.separatorBeg < len ) { + this.separatorEnd += 1; + } + this.argEnd -= this.rightWhitespaceCount(pattern.slice(0, this.separatorBeg)); + this.quoteEnd = this.argEnd; + if ( this.mustQuote ) { + this.failed = true; + } + return this; + } + normalizeArg(s) { + switch ( this.actualSeparatorCode ) { + case 0x22 /* " */: + if ( s.includes('"') === false ) { return; } + return s.replace(this.reUnescapeDoubleQuotes, '$1"'); + case 0x27 /* ' */: + if ( s.includes("'") === false ) { return; } + return s.replace(this.reUnescapeSingleQuotes, "$1'"); + case 0x60 /* ` */: + if ( s.includes('`') === false ) { return; } + return s.replace(this.reUnescapeBackticks, "$1`"); + default: + break; + } + if ( s.includes(',') === false ) { return; } + return s.replace(this.reUnescapeCommas, '$1,'); + } + leftWhitespaceCount(s) { + const match = this.reWhitespaceStart.exec(s); + return match === null ? 0 : match[0].length; + } + rightWhitespaceCount(s) { + const match = this.reWhitespaceEnd.exec(s); + return match === null ? 0 : match[0].length; + } + indexOfNextScriptletArgSeparator(pattern) { + const separatorChar = String.fromCharCode(this.actualSeparatorCode); + while ( this.argEnd < pattern.length ) { + const pos = pattern.indexOf(separatorChar, this.argEnd); + if ( pos === -1 ) { + return (this.argEnd = pattern.length); + } + if ( this.reOddTrailingEscape.test(pattern.slice(0, pos)) === false ) { + return (this.argEnd = pos); + } + this.transform = true; + this.argEnd = pos + 1; + } + } +} + +/******************************************************************************/ + class AstWalker { constructor(parser, from = 0) { this.parser = parser; @@ -797,11 +897,8 @@ export class AstFilterParser { this.rePatternScriptletJsonArgs = /^\{.*\}$/; this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; - this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/; - this.reUnescapeCommas = /((?:^|[^\\])(?:\\\\)*)\\,/g; - this.reUnescapeSingleQuotes = /((?:^|[^\\])(?:\\\\)*)\\'/g; - this.reUnescapeDoubleQuotes = /((?:^|[^\\])(?:\\\\)*)\\"/g; this.reNoopOption = /^_+$/; + this.scriptletArgListParser = new argListParser(','); } parse(raw) { @@ -2224,7 +2321,9 @@ export class AstFilterParser { const s = this.getNodeString(parent); const argsEnd = s.length; // token - const details = this.parseExtPatternScriptletArg(s, 0); + this.scriptletArgListParser.mustQuote = + this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) !== 0; + const details = this.scriptletArgListParser.nextArg(s, 0); if ( details.argBeg > 0 ) { next = this.allocTypedNode( NODE_TYPE_EXT_DECORATION, @@ -2283,7 +2382,7 @@ export class AstFilterParser { let decorationBeg = 0; let i = 0; for (;;) { - const details = this.parseExtPatternScriptletArg(s, i); + const details = this.scriptletArgListParser.nextArg(s, i); if ( decorationBeg < details.argBeg ) { next = this.allocTypedNode( NODE_TYPE_EXT_DECORATION, @@ -2299,10 +2398,10 @@ export class AstFilterParser { parentBeg + details.argEnd ); if ( details.transform ) { - this.setNodeTransform(next, this.normalizeScriptletArg( - s.slice(details.argBeg, details.argEnd), - details.separatorCode - )); + const arg = s.slice(details.argBeg, details.argEnd); + this.setNodeTransform(next, + this.scriptletArgListParser.normalizeArg(arg) + ); } prev = this.linkRight(prev, next); if ( details.failed ) { @@ -2315,79 +2414,6 @@ export class AstFilterParser { return this.throwHeadNode(head); } - parseExtPatternScriptletArg(pattern, beg = 0) { - if ( this.parseExtPatternScriptletArg.details === undefined ) { - this.parseExtPatternScriptletArg.details = { - quoteBeg: 0, argBeg: 0, argEnd: 0, quoteEnd: 0, - separatorCode: 0, separatorBeg: 0, separatorEnd: 0, - transform: false, failed: false, - }; - } - const details = this.parseExtPatternScriptletArg.details; - const len = pattern.length; - details.quoteBeg = beg + this.leftWhitespaceCount(pattern.slice(beg)); - details.failed = false; - const qc = pattern.charCodeAt(details.quoteBeg); - if ( qc === 0x22 /* " */ || qc === 0x27 /* ' */ ) { - details.separatorCode = qc; - details.argBeg = details.argEnd = details.quoteBeg + 1; - details.transform = false; - this.indexOfNextScriptletArgSeparator(pattern, details); - if ( details.argEnd !== len ) { - details.quoteEnd = details.argEnd + 1; - details.separatorBeg = details.separatorEnd = details.quoteEnd; - details.separatorEnd += this.leftWhitespaceCount(pattern.slice(details.quoteEnd)); - if ( details.separatorEnd === len ) { return details; } - if ( pattern.charCodeAt(details.separatorEnd) === 0x2C ) { - details.separatorEnd += 1; - return details; - } - } - } - details.separatorCode = 0x2C /* , */; - details.argBeg = details.argEnd = details.quoteBeg; - details.transform = false; - this.indexOfNextScriptletArgSeparator(pattern, details); - details.separatorBeg = details.separatorEnd = details.argEnd; - if ( details.separatorBeg < len ) { - details.separatorEnd += 1; - } - details.argEnd -= this.rightWhitespaceCount(pattern.slice(0, details.separatorBeg)); - details.quoteEnd = details.argEnd; - if ( this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) ) { - details.failed = true; - } - return details; - } - - indexOfNextScriptletArgSeparator(pattern, details) { - const separatorChar = String.fromCharCode(details.separatorCode); - while ( details.argEnd < pattern.length ) { - const pos = pattern.indexOf(separatorChar, details.argEnd); - if ( pos === -1 ) { - return (details.argEnd = pattern.length); - } - if ( this.reOddTrailingEscape.test(pattern.slice(0, pos)) === false ) { - return (details.argEnd = pos); - } - details.transform = true; - details.argEnd = pos + 1; - } - } - - normalizeScriptletArg(arg, separatorCode) { - if ( separatorCode === 0x22 /* " */ ) { - if ( arg.includes('"') === false ) { return; } - return arg.replace(this.reUnescapeDoubleQuotes, '$1"'); - } - if ( separatorCode === 0x27 /* ' */ ) { - if ( arg.includes("'") === false ) { return; } - return arg.replace(this.reUnescapeSingleQuotes, "$1'"); - } - if ( arg.includes(',') === false ) { return; } - return arg.replace(this.reUnescapeCommas, '$1,'); - } - getScriptletArgs() { const args = []; if ( this.isScriptletFilter() === false ) { return args; }