Support quoting scriptlet parameters with backticks

(In addition to in already supported single- and double-quote).

The parsing of (optionally) quoted arguments from an argument
list has been spinned off into a standalone helper in order to
be reused in other parts of the parser eventually.
This commit is contained in:
Raymond Hill 2023-10-16 17:38:57 -04:00
parent 1e4818a6e3
commit 027c7a4fb5
No known key found for this signature in database
GPG Key ID: 25E1490B761470C2
1 changed files with 109 additions and 83 deletions

View File

@ -597,6 +597,106 @@ const exCharCodeAt = (s, i) => {
/******************************************************************************/
class argListParser {
constructor(separator = 0x2C /* , */, mustQuote = false) {
this.separatorCode = separator.charCodeAt(0);
this.mustQuote = mustQuote;
this.quoteBeg = 0;
this.argBeg = 0;
this.argEnd = 0;
this.quoteEnd = 0;
this.actualSeparatorCode = 0;
this.separatorBeg = 0;
this.separatorEnd = 0;
this.transform = false;
this.failed = false;
this.reWhitespaceStart = /^\s+/;
this.reWhitespaceEnd = /\s+$/;
this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/;
this.reUnescapeDoubleQuotes = /((?:^|[^\\])(?:\\\\)*)\\"/g;
this.reUnescapeSingleQuotes = /((?:^|[^\\])(?:\\\\)*)\\'/g;
this.reUnescapeBackticks = /((?:^|[^\\])(?:\\\\)*)\\`/g;
this.reUnescapeCommas = /((?:^|[^\\])(?:\\\\)*)\\,/g;
}
nextArg(pattern, beg = 0) {
const len = pattern.length;
this.quoteBeg = beg + this.leftWhitespaceCount(pattern.slice(beg));
this.failed = false;
const qc = pattern.charCodeAt(this.quoteBeg);
if ( qc === 0x22 /* " */ || qc === 0x27 /* ' */ || qc === 0x60 /* ` */ ) {
this.actualSeparatorCode = qc;
this.argBeg = this.argEnd = this.quoteBeg + 1;
this.transform = false;
this.indexOfNextScriptletArgSeparator(pattern);
if ( this.argEnd !== len ) {
this.quoteEnd = this.argEnd + 1;
this.separatorBeg = this.separatorEnd = this.quoteEnd;
this.separatorEnd += this.leftWhitespaceCount(pattern.slice(this.quoteEnd));
if ( this.separatorEnd === len ) { return this; }
if ( pattern.charCodeAt(this.separatorEnd) === this.separatorCode ) {
this.separatorEnd += 1;
return this;
}
}
}
this.actualSeparatorCode = this.separatorCode;
this.argBeg = this.argEnd = this.quoteBeg;
this.transform = false;
this.indexOfNextScriptletArgSeparator(pattern);
this.separatorBeg = this.separatorEnd = this.argEnd;
if ( this.separatorBeg < len ) {
this.separatorEnd += 1;
}
this.argEnd -= this.rightWhitespaceCount(pattern.slice(0, this.separatorBeg));
this.quoteEnd = this.argEnd;
if ( this.mustQuote ) {
this.failed = true;
}
return this;
}
normalizeArg(s) {
switch ( this.actualSeparatorCode ) {
case 0x22 /* " */:
if ( s.includes('"') === false ) { return; }
return s.replace(this.reUnescapeDoubleQuotes, '$1"');
case 0x27 /* ' */:
if ( s.includes("'") === false ) { return; }
return s.replace(this.reUnescapeSingleQuotes, "$1'");
case 0x60 /* ` */:
if ( s.includes('`') === false ) { return; }
return s.replace(this.reUnescapeBackticks, "$1`");
default:
break;
}
if ( s.includes(',') === false ) { return; }
return s.replace(this.reUnescapeCommas, '$1,');
}
leftWhitespaceCount(s) {
const match = this.reWhitespaceStart.exec(s);
return match === null ? 0 : match[0].length;
}
rightWhitespaceCount(s) {
const match = this.reWhitespaceEnd.exec(s);
return match === null ? 0 : match[0].length;
}
indexOfNextScriptletArgSeparator(pattern) {
const separatorChar = String.fromCharCode(this.actualSeparatorCode);
while ( this.argEnd < pattern.length ) {
const pos = pattern.indexOf(separatorChar, this.argEnd);
if ( pos === -1 ) {
return (this.argEnd = pattern.length);
}
if ( this.reOddTrailingEscape.test(pattern.slice(0, pos)) === false ) {
return (this.argEnd = pos);
}
this.transform = true;
this.argEnd = pos + 1;
}
}
}
/******************************************************************************/
class AstWalker {
constructor(parser, from = 0) {
this.parser = parser;
@ -797,11 +897,8 @@ export class AstFilterParser {
this.rePatternScriptletJsonArgs = /^\{.*\}$/;
this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/;
this.reUnescapeCommas = /((?:^|[^\\])(?:\\\\)*)\\,/g;
this.reUnescapeSingleQuotes = /((?:^|[^\\])(?:\\\\)*)\\'/g;
this.reUnescapeDoubleQuotes = /((?:^|[^\\])(?:\\\\)*)\\"/g;
this.reNoopOption = /^_+$/;
this.scriptletArgListParser = new argListParser(',');
}
parse(raw) {
@ -2224,7 +2321,9 @@ export class AstFilterParser {
const s = this.getNodeString(parent);
const argsEnd = s.length;
// token
const details = this.parseExtPatternScriptletArg(s, 0);
this.scriptletArgListParser.mustQuote =
this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) !== 0;
const details = this.scriptletArgListParser.nextArg(s, 0);
if ( details.argBeg > 0 ) {
next = this.allocTypedNode(
NODE_TYPE_EXT_DECORATION,
@ -2283,7 +2382,7 @@ export class AstFilterParser {
let decorationBeg = 0;
let i = 0;
for (;;) {
const details = this.parseExtPatternScriptletArg(s, i);
const details = this.scriptletArgListParser.nextArg(s, i);
if ( decorationBeg < details.argBeg ) {
next = this.allocTypedNode(
NODE_TYPE_EXT_DECORATION,
@ -2299,10 +2398,10 @@ export class AstFilterParser {
parentBeg + details.argEnd
);
if ( details.transform ) {
this.setNodeTransform(next, this.normalizeScriptletArg(
s.slice(details.argBeg, details.argEnd),
details.separatorCode
));
const arg = s.slice(details.argBeg, details.argEnd);
this.setNodeTransform(next,
this.scriptletArgListParser.normalizeArg(arg)
);
}
prev = this.linkRight(prev, next);
if ( details.failed ) {
@ -2315,79 +2414,6 @@ export class AstFilterParser {
return this.throwHeadNode(head);
}
parseExtPatternScriptletArg(pattern, beg = 0) {
if ( this.parseExtPatternScriptletArg.details === undefined ) {
this.parseExtPatternScriptletArg.details = {
quoteBeg: 0, argBeg: 0, argEnd: 0, quoteEnd: 0,
separatorCode: 0, separatorBeg: 0, separatorEnd: 0,
transform: false, failed: false,
};
}
const details = this.parseExtPatternScriptletArg.details;
const len = pattern.length;
details.quoteBeg = beg + this.leftWhitespaceCount(pattern.slice(beg));
details.failed = false;
const qc = pattern.charCodeAt(details.quoteBeg);
if ( qc === 0x22 /* " */ || qc === 0x27 /* ' */ ) {
details.separatorCode = qc;
details.argBeg = details.argEnd = details.quoteBeg + 1;
details.transform = false;
this.indexOfNextScriptletArgSeparator(pattern, details);
if ( details.argEnd !== len ) {
details.quoteEnd = details.argEnd + 1;
details.separatorBeg = details.separatorEnd = details.quoteEnd;
details.separatorEnd += this.leftWhitespaceCount(pattern.slice(details.quoteEnd));
if ( details.separatorEnd === len ) { return details; }
if ( pattern.charCodeAt(details.separatorEnd) === 0x2C ) {
details.separatorEnd += 1;
return details;
}
}
}
details.separatorCode = 0x2C /* , */;
details.argBeg = details.argEnd = details.quoteBeg;
details.transform = false;
this.indexOfNextScriptletArgSeparator(pattern, details);
details.separatorBeg = details.separatorEnd = details.argEnd;
if ( details.separatorBeg < len ) {
details.separatorEnd += 1;
}
details.argEnd -= this.rightWhitespaceCount(pattern.slice(0, details.separatorBeg));
details.quoteEnd = details.argEnd;
if ( this.getFlags(AST_FLAG_EXT_SCRIPTLET_ADG) ) {
details.failed = true;
}
return details;
}
indexOfNextScriptletArgSeparator(pattern, details) {
const separatorChar = String.fromCharCode(details.separatorCode);
while ( details.argEnd < pattern.length ) {
const pos = pattern.indexOf(separatorChar, details.argEnd);
if ( pos === -1 ) {
return (details.argEnd = pattern.length);
}
if ( this.reOddTrailingEscape.test(pattern.slice(0, pos)) === false ) {
return (details.argEnd = pos);
}
details.transform = true;
details.argEnd = pos + 1;
}
}
normalizeScriptletArg(arg, separatorCode) {
if ( separatorCode === 0x22 /* " */ ) {
if ( arg.includes('"') === false ) { return; }
return arg.replace(this.reUnescapeDoubleQuotes, '$1"');
}
if ( separatorCode === 0x27 /* ' */ ) {
if ( arg.includes("'") === false ) { return; }
return arg.replace(this.reUnescapeSingleQuotes, "$1'");
}
if ( arg.includes(',') === false ) { return; }
return arg.replace(this.reUnescapeCommas, '$1,');
}
getScriptletArgs() {
const args = [];
if ( this.isScriptletFilter() === false ) { return args; }