Cache glob to regex at a higher level for push

This commit is contained in:
Erik Johnston 2017-03-29 15:53:14 +01:00
parent 3ce8d59176
commit a3810136fe
1 changed files with 57 additions and 47 deletions

View File

@ -17,6 +17,7 @@ import logging
import re import re
from synapse.types import UserID from synapse.types import UserID
from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
from synapse.util.caches.lrucache import LruCache from synapse.util.caches.lrucache import LruCache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -125,6 +126,11 @@ class PushRuleEvaluatorForEvent(object):
return self._value_cache.get(dotted_key, None) return self._value_cache.get(dotted_key, None)
# Caches (glob, word_boundary) -> regex for push. See _glob_matches
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
register_cache("regex_push_cache", regex_cache)
def _glob_matches(glob, value, word_boundary=False): def _glob_matches(glob, value, word_boundary=False):
"""Tests if value matches glob. """Tests if value matches glob.
@ -137,46 +143,63 @@ def _glob_matches(glob, value, word_boundary=False):
Returns: Returns:
bool bool
""" """
try: try:
if IS_GLOB.search(glob): r = regex_cache.get((glob, word_boundary), None)
r = re.escape(glob) if not r:
r = _glob_to_re(glob, word_boundary)
r = r.replace(r'\*', '.*?') regex_cache[(glob, word_boundary)] = r
r = r.replace(r'\?', '.') return r.search(value)
# handle [abc], [a-z] and [!a-z] style ranges.
r = GLOB_REGEX.sub(
lambda x: (
'[%s%s]' % (
x.group(1) and '^' or '',
x.group(2).replace(r'\\\-', '-')
)
),
r,
)
if word_boundary:
r = r"\b%s\b" % (r,)
r = _compile_regex(r)
return r.search(value)
else:
r = r + "$"
r = _compile_regex(r)
return r.match(value)
elif word_boundary:
r = re.escape(glob)
r = r"\b%s\b" % (r,)
r = _compile_regex(r)
return r.search(value)
else:
return value.lower() == glob.lower()
except re.error: except re.error:
logger.warn("Failed to parse glob to regex: %r", glob) logger.warn("Failed to parse glob to regex: %r", glob)
return False return False
def _glob_to_re(glob, word_boundary):
"""Generates regex for a given glob.
Args:
glob (string)
word_boundary (bool): Whether to match against word boundaries or entire
string. Defaults to False.
Returns:
regex object
"""
if IS_GLOB.search(glob):
r = re.escape(glob)
r = r.replace(r'\*', '.*?')
r = r.replace(r'\?', '.')
# handle [abc], [a-z] and [!a-z] style ranges.
r = GLOB_REGEX.sub(
lambda x: (
'[%s%s]' % (
x.group(1) and '^' or '',
x.group(2).replace(r'\\\-', '-')
)
),
r,
)
if word_boundary:
r = r"\b%s\b" % (r,)
return re.compile(r, flags=re.IGNORECASE)
else:
r = "^" + r + "$"
return re.compile(r, flags=re.IGNORECASE)
elif word_boundary:
r = re.escape(glob)
r = r"\b%s\b" % (r,)
return re.compile(r, flags=re.IGNORECASE)
else:
r = "^" + re.escape(glob) + "$"
return re.compile(r, flags=re.IGNORECASE)
def _flatten_dict(d, prefix=[], result={}): def _flatten_dict(d, prefix=[], result={}):
for key, value in d.items(): for key, value in d.items():
if isinstance(value, basestring): if isinstance(value, basestring):
@ -185,16 +208,3 @@ def _flatten_dict(d, prefix=[], result={}):
_flatten_dict(value, prefix=(prefix + [key]), result=result) _flatten_dict(value, prefix=(prefix + [key]), result=result)
return result return result
regex_cache = LruCache(5000)
def _compile_regex(regex_str):
r = regex_cache.get(regex_str, None)
if r:
return r
r = re.compile(regex_str, flags=re.IGNORECASE)
regex_cache[regex_str] = r
return r