Merge pull request #2500 from matrix-org/dbkr/fix_word_boundary_mentions

Fix notif kws that start/end with non-word chars
This commit is contained in:
David Baker 2017-10-05 12:27:59 +01:00 committed by GitHub
commit 44f8e383f3
1 changed files with 14 additions and 2 deletions

View File

@ -183,7 +183,7 @@ def _glob_to_re(glob, word_boundary):
r,
)
if word_boundary:
r = r"\b%s\b" % (r,)
r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE)
else:
@ -192,7 +192,7 @@ def _glob_to_re(glob, word_boundary):
return re.compile(r, flags=re.IGNORECASE)
elif word_boundary:
r = re.escape(glob)
r = r"\b%s\b" % (r,)
r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE)
else:
@ -200,6 +200,18 @@ def _glob_to_re(glob, word_boundary):
return re.compile(r, flags=re.IGNORECASE)
def _re_word_boundary(r):
"""
Adds word boundary characters to the start and end of an
expression to require that the match occur as a whole word,
but do so respecting the fact that strings starting or ending
with non-word characters will change word boundaries.
"""
# we can't use \b as it chokes on unicode. however \W seems to be okay
# as shorthand for [^0-9A-Za-z_].
return r"(^|\W)%s(\W|$)" % (r,)
def _flatten_dict(d, prefix=[], result=None):
if result is None:
result = {}