Cache glob to regex at a higher level for push

2017-03-29 15:53:14 +01:00 · 2017-03-29 15:53:14 +01:00 · a3810136fe
parent 3ce8d59176
commit a3810136fe
1 changed files with 57 additions and 47 deletions
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@ -17,6 +17,7 @@ import logging
 import re
 from synapse.types import UserID
 from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
 from synapse.util.caches.lrucache import LruCache
 logger = logging.getLogger(__name__)
@ -125,6 +126,11 @@ class PushRuleEvaluatorForEvent(object):
        return self._value_cache.get(dotted_key, None)
 # Caches (glob, word_boundary) -> regex for push. See _glob_matches
 regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
 register_cache("regex_push_cache", regex_cache)
 def _glob_matches(glob, value, word_boundary=False):
    """Tests if value matches glob.
@ -137,46 +143,63 @@ def _glob_matches(glob, value, word_boundary=False):
    Returns:
        bool
    """
    try:
-        if IS_GLOB.search(glob):
+        r = regex_cache.get((glob, word_boundary), None)
-            r = re.escape(glob)
+        if not r:
-
+            r = _glob_to_re(glob, word_boundary)
-            r = r.replace(r'\*', '.*?')
+            regex_cache[(glob, word_boundary)] = r
-            r = r.replace(r'\?', '.')
+        return r.search(value)
            # handle [abc], [a-z] and [!a-z] style ranges.
            r = GLOB_REGEX.sub(
                lambda x: (
                    '[%s%s]' % (
                        x.group(1) and '^' or '',
                        x.group(2).replace(r'\\\-', '-')
                    )
                ),
                r,
            )
            if word_boundary:
                r = r"\b%s\b" % (r,)
                r = _compile_regex(r)
                return r.search(value)
            else:
                r = r + "$"
                r = _compile_regex(r)
                return r.match(value)
        elif word_boundary:
            r = re.escape(glob)
            r = r"\b%s\b" % (r,)
            r = _compile_regex(r)
            return r.search(value)
        else:
            return value.lower() == glob.lower()
    except re.error:
        logger.warn("Failed to parse glob to regex: %r", glob)
        return False
 def _glob_to_re(glob, word_boundary):
    """Generates regex for a given glob.
    Args:
        glob (string)
        word_boundary (bool): Whether to match against word boundaries or entire
            string. Defaults to False.
    Returns:
        regex object
    """
    if IS_GLOB.search(glob):
        r = re.escape(glob)
        r = r.replace(r'\*', '.*?')
        r = r.replace(r'\?', '.')
        # handle [abc], [a-z] and [!a-z] style ranges.
        r = GLOB_REGEX.sub(
            lambda x: (
                '[%s%s]' % (
                    x.group(1) and '^' or '',
                    x.group(2).replace(r'\\\-', '-')
                )
            ),
            r,
        )
        if word_boundary:
            r = r"\b%s\b" % (r,)
            return re.compile(r, flags=re.IGNORECASE)
        else:
            r = "^" + r + "$"
            return re.compile(r, flags=re.IGNORECASE)
    elif word_boundary:
        r = re.escape(glob)
        r = r"\b%s\b" % (r,)
        return re.compile(r, flags=re.IGNORECASE)
    else:
        r = "^" + re.escape(glob) + "$"
        return re.compile(r, flags=re.IGNORECASE)
 def _flatten_dict(d, prefix=[], result={}):
    for key, value in d.items():
        if isinstance(value, basestring):
@ -185,16 +208,3 @@ def _flatten_dict(d, prefix=[], result={}):
            _flatten_dict(value, prefix=(prefix + [key]), result=result)
    return result
 regex_cache = LruCache(5000)
 def _compile_regex(regex_str):
    r = regex_cache.get(regex_str, None)
    if r:
        return r
    r = re.compile(regex_str, flags=re.IGNORECASE)
    regex_cache[regex_str] = r
    return r