mirror of https://github.com/gorhill/uBlock.git
Import version 1.2.0 of https://github.com/foo123/RegexAnalyzer
Related issue: - https://github.com/uBlockOrigin/uBlock-issues/issues/2374
This commit is contained in:
parent
f8d2bd0ebb
commit
d80ac4f292
|
@ -1,19 +1,37 @@
|
|||
/**
|
||||
*
|
||||
* Regex
|
||||
* @version: 1.1.0
|
||||
* @version: 1.2.0
|
||||
*
|
||||
* A simple & generic Regular Expression Analyzer & Composer for PHP, Python, Node.js / Browser / XPCOM Javascript
|
||||
* A simple & generic Regular Expression Analyzer & Composer for PHP, Python, Javascript
|
||||
* https://github.com/foo123/RegexAnalyzer
|
||||
*
|
||||
**/
|
||||
export default (function () {
|
||||
!function(root, name, factory) {
|
||||
"use strict";
|
||||
var __version__ = "1.1.0",
|
||||
if (('undefined'!==typeof Components)&&('object'===typeof Components.classes)&&('object'===typeof Components.classesByID)&&Components.utils&&('function'===typeof Components.utils['import'])) /* XPCOM */
|
||||
(root.$deps = root.$deps||{}) && (root.EXPORTED_SYMBOLS = [name]) && (root[name] = root.$deps[name] = factory.call(root));
|
||||
else if (('object'===typeof module)&&module.exports) /* CommonJS */
|
||||
(module.$deps = module.$deps||{}) && (module.exports = module.$deps[name] = factory.call(root));
|
||||
else if (('undefined'!==typeof System)&&('function'===typeof System.register)&&('function'===typeof System['import'])) /* ES6 module */
|
||||
System.register(name,[],function($__export){$__export(name, factory.call(root));});
|
||||
else if (('function'===typeof define)&&define.amd&&('function'===typeof require)&&('function'===typeof require.specified)&&require.specified(name) /*&& !require.defined(name)*/) /* AMD */
|
||||
define(name,['module'],function(module){factory.moduleUri = module.uri; return factory.call(root);});
|
||||
else if (!(name in root)) /* Browser/WebWorker/.. */
|
||||
(root[name] = factory.call(root)||1)&&('function'===typeof(define))&&define.amd&&define(function(){return root[name];} );
|
||||
}( /* current root */ 'undefined' !== typeof self ? self : this,
|
||||
/* module name */ "Regex",
|
||||
/* module factory */ function ModuleFactory__Regex(undef) {
|
||||
"use strict";
|
||||
var __version__ = "1.2.0",
|
||||
|
||||
PROTO = 'prototype', OP = Object[PROTO], AP = Array[PROTO],
|
||||
Keys = Object.keys, to_string = OP.toString, HAS = OP.hasOwnProperty,
|
||||
fromCharCode = String.fromCharCode, CHAR = 'charAt', CHARCODE = 'charCodeAt', toJSON = JSON.stringify,
|
||||
fromCharCode = String.fromCharCode,
|
||||
fromCodePoint = String.fromCodePoint || String.fromCharCode,
|
||||
CHAR = 'charAt', CHARCODE = 'charCodeAt',
|
||||
CODEPOINT = String.prototype.codePointAt ? 'codePointAt' : CHARCODE,
|
||||
toJSON = JSON.stringify,
|
||||
INF = Infinity, ESC = '\\',
|
||||
specialChars = {
|
||||
"." : "MatchAnyChar",
|
||||
|
@ -106,10 +124,12 @@ function clone( obj, cloned )
|
|||
for (var p in obj) if (HAS.call(obj,p)) cloned[p] = obj[p];
|
||||
return cloned;
|
||||
}
|
||||
function RE_OBJ( re )
|
||||
function RE_OBJ(re, flags, flavor)
|
||||
{
|
||||
var self = this;
|
||||
self.re = re;
|
||||
self.flags = flags;
|
||||
self.flavor = flavor;
|
||||
self.len = re.length;
|
||||
self.pos = 0;
|
||||
self.index = 0;
|
||||
|
@ -120,6 +140,8 @@ function RE_OBJ( re )
|
|||
RE_OBJ[PROTO] = {
|
||||
constructor: RE_OBJ
|
||||
,re: null
|
||||
,flags: null
|
||||
,flavor: ''
|
||||
,len: null
|
||||
,pos: null
|
||||
,index: null
|
||||
|
@ -129,6 +151,8 @@ RE_OBJ[PROTO] = {
|
|||
,dispose: function() {
|
||||
var self = this;
|
||||
self.re = null;
|
||||
self.flags = null;
|
||||
self.flavor = null;
|
||||
self.len = null;
|
||||
self.pos = null;
|
||||
self.index = null;
|
||||
|
@ -260,18 +284,18 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
while (ps.length < n) ps = z + ps;
|
||||
return ps;
|
||||
},
|
||||
char_code = function( c ) { return c[CHARCODE](0); },
|
||||
char_code_range = function( s ) { return [s[CHARCODE](0), s[CHARCODE](s.length-1)]; },
|
||||
char_code = function(c) {return c[CODEPOINT](0);},
|
||||
char_code_range = function(s) {return [s[CODEPOINT](0), s[CODEPOINT](s.length-1)];},
|
||||
//char_codes = function( s_or_a ) { return (s_or_a.substr ? s_or_a.split("") : s_or_a).map( char_code ); },
|
||||
// http://stackoverflow.com/questions/12376870/create-an-array-of-characters-from-specified-range
|
||||
character_range = function(first, last) {
|
||||
if (first && is_array(first)) {last = first[1]; first = first[0];}
|
||||
var ch, chars, start = first[CHARCODE](0), end = last[CHARCODE](0);
|
||||
var ch, chars, start = first[CODEPOINT](0), end = last[CODEPOINT](0);
|
||||
|
||||
if ( end === start ) return [ fromCharCode( start ) ];
|
||||
if (end === start) return [fromCodePoint(start)];
|
||||
|
||||
chars = [];
|
||||
for (ch = start; ch <= end; ++ch) chars.push( fromCharCode( ch ) );
|
||||
for (ch = start; ch <= end; ++ch) chars.push(fromCodePoint(ch));
|
||||
return chars;
|
||||
},
|
||||
concat = function(p1, p2) {
|
||||
|
@ -280,7 +304,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var p, l;
|
||||
if (is_array(p2))
|
||||
{
|
||||
for (p=0,l=p2.length; p<l; p++) p1[p2[p]] = 1;
|
||||
for (p=0,l=p2.length; p<l; ++p) p1[p2[p]] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -303,7 +327,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var lp = pos, l = 0, sl = s.length, ch;
|
||||
while ((lp < sl) && (l <= maxlen) && -1 < CHARS.indexOf(ch=s[CHAR](lp)))
|
||||
{
|
||||
lp++; l++;
|
||||
++lp; ++l;
|
||||
}
|
||||
return l >= minlen ? l : false;
|
||||
},
|
||||
|
@ -314,7 +338,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var lp = pos, l = 0, sl = s.length, ch;
|
||||
while ((lp < sl) && (l <= maxlen) && ((ch=s[CHARCODE](lp)) >= RANGE[0] && ch <= RANGE[1]))
|
||||
{
|
||||
lp++; l++;
|
||||
++lp; ++l;
|
||||
}
|
||||
return l >= minlen ? l : false;
|
||||
},
|
||||
|
@ -327,12 +351,12 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
while ((lp < sl) && (l <= maxlen) && found)
|
||||
{
|
||||
ch = s[CHARCODE](lp); found = false;
|
||||
for (i=0; i<Rl; i++)
|
||||
for (i=0; i<Rl; ++i)
|
||||
{
|
||||
RANGE = RANGES[i];
|
||||
if (ch >= RANGE[0] && ch <= RANGE[1])
|
||||
{
|
||||
lp++; l++; found = true;
|
||||
++lp; ++l; found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -418,7 +442,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
else if (null != r)
|
||||
{
|
||||
r = array(r);
|
||||
for(i=0,l=r?r.length:0; i<l; i++)
|
||||
for (i=0,l=r?r.length:0; i<l; ++i)
|
||||
{
|
||||
state.node = node;
|
||||
ret = walk(ret, r[i], state);
|
||||
|
@ -447,7 +471,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
if (T_ALTERNATION === type)
|
||||
{
|
||||
var r = [];
|
||||
for(var i=0,l=node.val.length-1; i<l; i++) r.push(node.val[i],'|');
|
||||
for (var i=0,l=node.val.length-1; i<l; ++i) r.push(node.val[i], '|');
|
||||
r.push(node.val[l]);
|
||||
return r;
|
||||
}
|
||||
|
@ -488,6 +512,10 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
g = [].concat('(?<!').concat(array(node.val)).concat(')');
|
||||
}
|
||||
else if (node.flags.NamedGroup && !state.compatibility)
|
||||
{
|
||||
g = [].concat('(?<'+node.flags.GroupName+'>').concat(array(node.val)).concat(')');
|
||||
}
|
||||
else
|
||||
{
|
||||
g = [].concat('(').concat(array(node.val)).concat(')');
|
||||
|
@ -523,7 +551,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
if (numrepeats)
|
||||
{
|
||||
repeats = new Array(numrepeats);
|
||||
for(var i=0; i<numrepeats; i++) repeats[i] = node.val;
|
||||
for (var i=0; i<numrepeats; ++i) repeats[i] = node.val;
|
||||
return repeats;
|
||||
}
|
||||
else
|
||||
|
@ -549,7 +577,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
var i, l = node.val.length, cur,
|
||||
min = l ? walk(0, node.val[0], state) : 0;
|
||||
for(i=1; i<l; i++)
|
||||
for (i=1; i<l; ++i)
|
||||
{
|
||||
cur = walk(0, node.val[i], state);
|
||||
if (cur < min) min = cur;
|
||||
|
@ -565,7 +593,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
if (0 === node.flags.min) return null;
|
||||
var i, nrepeats = node.flags.min, repeats = new Array(nrepeats);
|
||||
for(i=0; i<nrepeats; i++) repeats[i] = node.val;
|
||||
for (i=0; i<nrepeats; ++i) repeats[i] = node.val;
|
||||
return repeats;
|
||||
}
|
||||
else if ((T_GROUP === type) && node.flags.GroupIndex)
|
||||
|
@ -587,7 +615,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var i, l = node.val.length, cur, max = l ? walk(0, node.val[0], state) : 0;
|
||||
if (-1 !== max)
|
||||
{
|
||||
for(i=1; i<l; i++)
|
||||
for (i=1; i<l; ++i)
|
||||
{
|
||||
cur = walk(0, node.val[i], state);
|
||||
if (-1 === cur)
|
||||
|
@ -649,7 +677,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
if (T_SEQUENCE === type)
|
||||
{
|
||||
var seq=[], i=0, l=node.val.length, n;
|
||||
for(i=0; i<l; i++)
|
||||
for (i=0; i<l; ++i)
|
||||
{
|
||||
n = node.val[i];
|
||||
seq.push( n );
|
||||
|
@ -688,7 +716,8 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
}
|
||||
var type = node.type;
|
||||
|
||||
if ( (T_CHARS === type) || (T_CHARRANGE === type) ||
|
||||
if (
|
||||
(T_CHARS === type) || (T_CHARRANGE === type) ||
|
||||
(T_UNICODECHAR === type) || (T_HEXCHAR === type) ||
|
||||
((T_SPECIAL === type) && !node.flags.MatchStart && !node.flags.MatchEnd)
|
||||
)
|
||||
|
@ -742,6 +771,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var part = node.val;
|
||||
if (node.flags.BackReference)
|
||||
{
|
||||
part = node.flags.GroupIndex;
|
||||
ret += HAS.call(state.group, part) ? state.group[part] : '';
|
||||
return ret;
|
||||
}
|
||||
|
@ -817,10 +847,10 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
var range = [node.val[0], node.val[1]];
|
||||
if (state.escaped)
|
||||
{
|
||||
if ( T_UNICODECHAR === range[0].type ) range[0] = ESC+'u'+pad(range[0].flags.Code,4);
|
||||
if (T_UNICODECHAR === range[0].type) range[0] = range[0].flags.UnicodePoint ? ESC+'u{'+range[0].flags.Code+'}' : ESC+'u'+pad(range[0].flags.Code,4);
|
||||
else if (T_HEXCHAR === range[0].type) range[0] = ESC+'x'+pad(range[0].flags.Code,2);
|
||||
else range[0] = esc_re(range[0], ESC, 1);
|
||||
if ( T_UNICODECHAR === range[1].type ) range[1] = ESC+'u'+pad(range[1].flags.Code,4);
|
||||
if (T_UNICODECHAR === range[1].type) range[1] = range[1].flags.UnicodePoint ? ESC+'u{'+range[1].flags.Code+'}' : ESC+'u'+pad(range[1].flags.Code,4);
|
||||
else if (T_HEXCHAR === range[1].type) range[1] = ESC+'x'+pad(range[1].flags.Code,2);
|
||||
else range[1] = esc_re(range[1], ESC, 1);
|
||||
}
|
||||
|
@ -833,7 +863,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
}
|
||||
else if (T_UNICODECHAR === type)
|
||||
{
|
||||
ret.src += state.escaped ? ESC+'u'+pad(node.flags.Code,4) : node.flags.Char;
|
||||
ret.src += node.flags.UnicodePoint ? ESC+'u{'+node.flags.Code+'}' : (state.escaped ? ESC+'u'+pad(node.flags.Code,4) : node.flags.Char);
|
||||
}
|
||||
else if (T_HEXCHAR === type)
|
||||
{
|
||||
|
@ -843,7 +873,14 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
if (node.flags.BackReference)
|
||||
{
|
||||
ret.src += ESC+node.val;
|
||||
if (state.compatibility || (node.flags.GroupIndex === node.flags.GroupName))
|
||||
{
|
||||
ret.src += ESC+node.flags.GroupIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.src += ESC+'k<'+node.flags.GroupName+'>';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -929,11 +966,18 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
}
|
||||
return false;
|
||||
},
|
||||
match_unicode = function( s ) {
|
||||
var m = false;
|
||||
if ( (s.length > 4) && ('u' === s[CHAR](0)) )
|
||||
match_unicode = function(s, flags) {
|
||||
var m = false, l;
|
||||
if ((s.length > 3) && ('u' === s[CHAR](0)))
|
||||
{
|
||||
if ( match_char_ranges(HEXDIGITS_RANGES, s, 1, 4, 4) ) return [m=s.slice(0,5), m.slice(1)];
|
||||
if (flags.u && '{' === s[CHAR](1) && (l=match_char_ranges(HEXDIGITS_RANGES, s, 2, 1, 6)) && '}' === s[CHAR](l+2))
|
||||
{
|
||||
return [m=s.slice(0,l+3), m.slice(2, -1), 1];
|
||||
}
|
||||
else if (l=match_char_ranges(HEXDIGITS_RANGES, s, 1, 4, 4))
|
||||
{
|
||||
return [m=s.slice(0,l+1), m.slice(1), 0];
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
|
@ -942,7 +986,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
if ((sl > 2) && ('{' === s[CHAR](pos)))
|
||||
{
|
||||
m = ['', '', null];
|
||||
pos++;
|
||||
++pos;
|
||||
if (l=match_chars(SPACES, s, pos)) pos += l;
|
||||
if (l=match_char_range(DIGITS_RANGE, s, pos))
|
||||
{
|
||||
|
@ -978,12 +1022,12 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
},
|
||||
chargroup = function chargroup(re_obj) {
|
||||
var sequence = [], chars = [], allchars = [], flags = {}, flag, ch, lre,
|
||||
prevch, range, isRange = false, m, isUnicode, isHex, escaped = false;
|
||||
prevch = null, range, isRange = false, m, isUnicode, isHex, isSpecial, escaped = false;
|
||||
|
||||
if ('^' === re_obj.re[CHAR](re_obj.pos))
|
||||
{
|
||||
flags["NegativeMatch"] = 1;
|
||||
re_obj.pos++;
|
||||
++re_obj.pos;
|
||||
}
|
||||
|
||||
lre = re_obj.len;
|
||||
|
@ -991,6 +1035,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
isUnicode = false;
|
||||
isHex = false;
|
||||
isSpecial = false;
|
||||
m = null;
|
||||
prevch = ch;
|
||||
ch = re_obj.re[CHAR](re_obj.pos++);
|
||||
|
@ -1003,29 +1048,57 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
// unicode character
|
||||
if ('u' === ch)
|
||||
{
|
||||
m = match_unicode( re_obj.re.substr( re_obj.pos-1 ) );
|
||||
m = match_unicode(re_obj.re.substr(re_obj.pos-1), re_obj.flags);
|
||||
if (m)
|
||||
{
|
||||
re_obj.pos += m[0].length-1;
|
||||
ch = Node(T_UNICODECHAR, m[0], {"Char": fromCharCode(parseInt(m[1], 16)), "Code": m[1]});
|
||||
ch = Node(T_UNICODECHAR, m[0], {"Char": m[2] ? fromCodePoint(parseInt(m[1], 16)) : fromCharCode(parseInt(m[1], 16)), "Code": m[1], "UnicodePoint": !!m[2]});
|
||||
isUnicode = true; isHex = false;
|
||||
}
|
||||
}
|
||||
|
||||
// hex character
|
||||
else if ('x' === ch)
|
||||
{
|
||||
m = match_hex(re_obj.re.substr(re_obj.pos-1));
|
||||
if (m)
|
||||
{
|
||||
re_obj.pos += m[0].length-1;
|
||||
ch = Node(T_HEXCHAR, m[0], {"Char": fromCharCode(parseInt(m[1], 16)), "Code": m[1]});
|
||||
isUnicode = true; isHex = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( isRange && escaped === false && ']' === ch )
|
||||
// special character
|
||||
else if (HAS.call(specialCharsEscaped, ch) && ('/' !== ch))
|
||||
{
|
||||
isRange = false;
|
||||
chars.push(range[0], '-');
|
||||
isSpecial = true;
|
||||
flag = {};
|
||||
flag[specialCharsEscaped[ch]] = 1;
|
||||
ch = Node(T_SPECIAL, ch, flag);
|
||||
}
|
||||
}
|
||||
|
||||
if (isRange)
|
||||
{
|
||||
if (
|
||||
(ch instanceof Node) &&
|
||||
(ch.type === T_SPECIAL) &&
|
||||
(-1 !== ['s','S','d','D','w','W'].indexOf(ch.val))
|
||||
)
|
||||
{
|
||||
if (range[0] instanceof Node)
|
||||
{
|
||||
sequence.push(range[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
chars.push(range[0]);
|
||||
}
|
||||
chars.push('-');
|
||||
sequence.push(ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (chars.length)
|
||||
{
|
||||
|
@ -1033,9 +1106,10 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
chars = [];
|
||||
}
|
||||
range[1] = ch;
|
||||
isRange = false;
|
||||
sequence.push(Node(T_CHARRANGE, range));
|
||||
}
|
||||
isRange = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (escaped)
|
||||
|
@ -1050,16 +1124,14 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
sequence.push(ch);
|
||||
}
|
||||
|
||||
else if ( HAS.call(specialCharsEscaped,ch) && ('/' !== ch) )
|
||||
else if (isSpecial)
|
||||
{
|
||||
if (chars.length)
|
||||
{
|
||||
allchars = allchars.concat(chars);
|
||||
chars = [];
|
||||
}
|
||||
flag = {};
|
||||
flag[ specialCharsEscaped[ch] ] = 1;
|
||||
sequence.push( Node(T_SPECIAL, ch, flag) );
|
||||
sequence.push(ch);
|
||||
}
|
||||
|
||||
else
|
||||
|
@ -1084,11 +1156,28 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
}
|
||||
|
||||
else if ('-' === ch)
|
||||
{
|
||||
if (
|
||||
null == prevch ||
|
||||
']' === re_obj.re[CHAR](re_obj.pos) ||
|
||||
(
|
||||
(prevch instanceof Node) &&
|
||||
(prevch.type === T_SPECIAL) &&
|
||||
(-1 !== ['s','S','d','D','w','W'].indexOf(prevch.val))
|
||||
)
|
||||
)
|
||||
{
|
||||
// take it as literal
|
||||
// https://github.com/foo123/RegexAnalyzer/issues/5
|
||||
chars.push(ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
range = [prevch, ''];
|
||||
if (prevch instanceof Node) sequence.pop(); else chars.pop();
|
||||
isRange = true;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
|
@ -1131,7 +1220,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
flags["GroupName"] += ch;
|
||||
}
|
||||
flags["GroupIndex"] = HAS.call(re_obj.group, flags["GroupName"]) ? re_obj.group[flags["GroupName"]] : null;
|
||||
return Node(T_SPECIAL, String(flags[ "GroupIndex" ]), flags);
|
||||
return Node(T_SPECIAL, flags["GroupName"], flags);
|
||||
}
|
||||
|
||||
else if ("?#" === pre)
|
||||
|
@ -1186,6 +1275,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
|
||||
else if (("?<" === pre) || ("?P<" === pre3))
|
||||
{
|
||||
// https://github.com/foo123/RegexAnalyzer/issues/6
|
||||
flags["NamedGroup"] = 1;
|
||||
flags["GroupName"] = '';
|
||||
re_obj.pos += "?<" === pre ? 2 : 3;
|
||||
|
@ -1221,6 +1311,9 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
{
|
||||
// unicode character
|
||||
if ('u' === ch)
|
||||
{
|
||||
m = match_unicode(re_obj.re.substr(re_obj.pos-1), re_obj.flags);
|
||||
if (m)
|
||||
{
|
||||
if (wordlen)
|
||||
{
|
||||
|
@ -1228,13 +1321,21 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
word = '';
|
||||
wordlen = 0;
|
||||
}
|
||||
m = match_unicode( re_obj.re.substr( re_obj.pos-1 ) );
|
||||
re_obj.pos += m[0].length-1;
|
||||
sequence.push( Node(T_UNICODECHAR, m[0], {"Char": fromCharCode(parseInt(m[1], 16)), "Code": m[1]}) );
|
||||
sequence.push(Node(T_UNICODECHAR, m[0], {"Char": m[2] ? fromCodePoint(parseInt(m[1], 16)) : fromCharCode(parseInt(m[1], 16)), "Code": m[1], "UnicodePoint": !!m[2]}));
|
||||
}
|
||||
else
|
||||
{
|
||||
word += ch;
|
||||
wordlen += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// hex character
|
||||
else if ('x' === ch)
|
||||
{
|
||||
m = match_hex(re_obj.re.substr(re_obj.pos-1));
|
||||
if (m)
|
||||
{
|
||||
if (wordlen)
|
||||
{
|
||||
|
@ -1242,10 +1343,41 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
word = '';
|
||||
wordlen = 0;
|
||||
}
|
||||
m = match_hex( re_obj.re.substr( re_obj.pos-1 ) );
|
||||
re_obj.pos += m[0].length-1;
|
||||
sequence.push(Node(T_HEXCHAR, m[0], {"Char": fromCharCode(parseInt(m[1], 16)), "Code": m[1]}));
|
||||
}
|
||||
else
|
||||
{
|
||||
word += ch;
|
||||
wordlen += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// js back-reference
|
||||
else if ('k' === ch && '<' === re_obj.re[CHAR](re_obj.pos))
|
||||
{
|
||||
// https://github.com/foo123/RegexAnalyzer/issues/6
|
||||
if (wordlen)
|
||||
{
|
||||
sequence.push(Node(T_STRING, word));
|
||||
word = '';
|
||||
wordlen = 0;
|
||||
}
|
||||
re_obj.pos++;
|
||||
word = '';
|
||||
while (re_obj.pos < lre)
|
||||
{
|
||||
ch = re_obj.re[CHAR](re_obj.pos);
|
||||
if ('>' === ch) {re_obj.pos++; break;}
|
||||
else {word += ch; re_obj.pos++;}
|
||||
}
|
||||
flag = {};
|
||||
flag["BackReference"] = 1;
|
||||
flag["GroupName"] = word;
|
||||
flag["GroupIndex"] = HAS.call(re_obj.group, word) ? re_obj.group[word] : null;
|
||||
sequence.push(Node(T_SPECIAL, word, flag));
|
||||
word = '';
|
||||
}
|
||||
|
||||
else if (HAS.call(specialCharsEscaped, ch) && ('/' !== ch))
|
||||
{
|
||||
|
@ -1277,6 +1409,7 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
}
|
||||
flag = {};
|
||||
flag['BackReference'] = 1;
|
||||
flag['GroupName'] = word;
|
||||
flag['GroupIndex'] = parseInt(word, 10);
|
||||
sequence.push(Node(T_SPECIAL, word, flag));
|
||||
word = '';
|
||||
|
@ -1460,10 +1593,10 @@ var rnd = function( a, b ){ return Math.round((b-a)*Math.random()+a); },
|
|||
// https://docs.python.org/3/library/re.html
|
||||
// http://php.net/manual/en/reference.pcre.pattern.syntax.php
|
||||
// A simple regular expression analyzer
|
||||
function Analyzer( re, delim )
|
||||
function Analyzer(re, delim, flavor)
|
||||
{
|
||||
if ( !(this instanceof Analyzer) ) return new Analyzer(re, delim);
|
||||
if ( re ) this.input( re, delim );
|
||||
if (!(this instanceof Analyzer)) return new Analyzer(re, delim, flavor);
|
||||
if (re) this.input(re, delim, flavor);
|
||||
}
|
||||
Analyzer.VERSION = __version__;
|
||||
Analyzer[PROTO] = {
|
||||
|
@ -1471,6 +1604,7 @@ Analyzer[PROTO] = {
|
|||
constructor: Analyzer,
|
||||
|
||||
ast: null,
|
||||
flavor: '',
|
||||
re: null,
|
||||
fl: null,
|
||||
src: null,
|
||||
|
@ -1478,10 +1612,12 @@ Analyzer[PROTO] = {
|
|||
min: null,
|
||||
max: null,
|
||||
ch: null,
|
||||
bc: true,
|
||||
|
||||
dispose: function() {
|
||||
var self = this;
|
||||
self.ast = null;
|
||||
self.flavor = null;
|
||||
self.re = null;
|
||||
self.fl = null;
|
||||
self.src = null;
|
||||
|
@ -1503,7 +1639,12 @@ Analyzer[PROTO] = {
|
|||
return self;
|
||||
},
|
||||
|
||||
input: function( re, delim ) {
|
||||
backwardsCompatible: function(enable) {
|
||||
this.bc = !!enable;
|
||||
return this;
|
||||
},
|
||||
|
||||
input: function(re, delim, flavor) {
|
||||
var self = this;
|
||||
if (!arguments.length) return self.re;
|
||||
if (re)
|
||||
|
@ -1520,7 +1661,7 @@ Analyzer[PROTO] = {
|
|||
{
|
||||
ch = re[CHAR](l-1);
|
||||
if (delim === ch) break;
|
||||
else { fl[ ch ] = 1; l--; }
|
||||
else {fl[ch] = 1; --l;}
|
||||
}
|
||||
|
||||
if (0 < l)
|
||||
|
@ -1537,7 +1678,7 @@ Analyzer[PROTO] = {
|
|||
|
||||
// re is different, reset the ast, etc
|
||||
if (self.re !== re) self.reset();
|
||||
self.re = re; self.fl = fl;
|
||||
self.re = re; self.fl = fl; self.flavor = String(flavor || '');
|
||||
}
|
||||
return self;
|
||||
},
|
||||
|
@ -1546,7 +1687,7 @@ Analyzer[PROTO] = {
|
|||
var self = this;
|
||||
if ((null != self.re) && (null === self.ast))
|
||||
{
|
||||
var re = new RE_OBJ(self.re);
|
||||
var re = new RE_OBJ(self.re, self.fl, self.flavor);
|
||||
self.ast = analyze_re(re);
|
||||
re.dispose();
|
||||
}
|
||||
|
@ -1571,6 +1712,7 @@ Analyzer[PROTO] = {
|
|||
map : map_src,
|
||||
reduce : reduce_src,
|
||||
escaped : false !== escaped,
|
||||
compatibility : self.bc,
|
||||
group : {}
|
||||
};
|
||||
re = walk({src:'',group:{}}, self.ast, state);
|
||||
|
@ -1593,11 +1735,11 @@ Analyzer[PROTO] = {
|
|||
return true === raw ? sel.grp : clone(self.grp);
|
||||
},
|
||||
|
||||
compile: function( flags ) {
|
||||
compile: function(flags, notBackwardsCompatible) {
|
||||
var self = this;
|
||||
if (null == self.re) return null;
|
||||
flags = flags || self.fl || {};
|
||||
return new RegExp(self.source(), (flags.g||flags.G?'g':'')+(flags.i||flags.I?'i':'')+(flags.m||flags.M?'m':'')+(flags.y||flags.Y?'y':''));
|
||||
return new RegExp(self.source(), (flags.g||flags.G?'g':'')+(flags.i||flags.I?'i':'')+(flags.m||flags.M?'m':'')+(flags.y||flags.Y?'y':'')+(flags.u?'u':'')+(flags.d?'d':'')+(flags.s?'s':''));
|
||||
},
|
||||
|
||||
tree: function(flat) {
|
||||
|
@ -1626,7 +1768,7 @@ Analyzer[PROTO] = {
|
|||
if (1 < numsamples)
|
||||
{
|
||||
var samples = new Array(numsamples);
|
||||
for(var i=0; i<numsamples; i++) samples[i] = walk('', self.ast, state);
|
||||
for (var i=0; i<numsamples; ++i) samples[i] = walk('', self.ast, state);
|
||||
return samples;
|
||||
}
|
||||
return walk('', self.ast, state);
|
||||
|
@ -1902,9 +2044,9 @@ Composer[PROTO] = {
|
|||
return self;
|
||||
},
|
||||
|
||||
UNICODE: function( code ) {
|
||||
UNICODE: function(code, uni) {
|
||||
var self = this;
|
||||
self.ast[self.level].node.push(ESC+'u'+pad(code||0, 4));
|
||||
self.ast[self.level].node.push(true === uni ? ESC+'u{'+String(code||0)+'}' : ESC+'u'+pad(code||0, 4));
|
||||
return self;
|
||||
},
|
||||
|
||||
|
@ -1979,21 +2121,21 @@ Composer[PROTO] = {
|
|||
|
||||
zeroOrOne: function(greedy) {
|
||||
var self = this;
|
||||
if ( arguments.length < 3 ) greedy = true;
|
||||
if (arguments.length < 1) greedy = true;
|
||||
self.ast[self.level].node[self.ast[self.level].node.length-1] += (!greedy ? '??' : '?');
|
||||
return self;
|
||||
},
|
||||
|
||||
zeroOrMore: function(greedy) {
|
||||
var self = this;
|
||||
if ( arguments.length < 3 ) greedy = true;
|
||||
if (arguments.length < 1) greedy = true;
|
||||
self.ast[self.level].node[self.ast[self.level].node.length-1] += (!greedy ? '*?' : '*');
|
||||
return self;
|
||||
},
|
||||
|
||||
oneOrMore: function(greedy) {
|
||||
var self = this;
|
||||
if ( arguments.length < 3 ) greedy = true;
|
||||
if (arguments.length < 1) greedy = true;
|
||||
self.ast[self.level].node[self.ast[self.level].node.length-1] += (!greedy ? '+?' : '+');
|
||||
return self;
|
||||
},
|
||||
|
@ -2145,4 +2287,4 @@ var Regex = {
|
|||
};
|
||||
/* export the module */
|
||||
return Regex;
|
||||
})();
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue