Properly parse event_fields in filters (#15607)
The event_fields property in filters should use the proper escape rules, namely backslashes can be escaped with an additional backslash. This adds tests (adapted from matrix-js-sdk) and implements the logic to properly split the event_fields strings.
This commit is contained in:
parent
201597fc86
commit
c5d1e6d414
|
@ -0,0 +1 @@
|
||||||
|
Fix a long-standing bug where filters with multiple backslashes were rejected.
|
|
@ -128,20 +128,7 @@ USER_FILTER_SCHEMA = {
|
||||||
"account_data": {"$ref": "#/definitions/filter"},
|
"account_data": {"$ref": "#/definitions/filter"},
|
||||||
"room": {"$ref": "#/definitions/room_filter"},
|
"room": {"$ref": "#/definitions/room_filter"},
|
||||||
"event_format": {"type": "string", "enum": ["client", "federation"]},
|
"event_format": {"type": "string", "enum": ["client", "federation"]},
|
||||||
"event_fields": {
|
"event_fields": {"type": "array", "items": {"type": "string"}},
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
# Don't allow '\\' in event field filters. This makes matching
|
|
||||||
# events a lot easier as we can then use a negative lookbehind
|
|
||||||
# assertion to split '\.' If we allowed \\ then it would
|
|
||||||
# incorrectly split '\\.' See synapse.events.utils.serialize_event
|
|
||||||
#
|
|
||||||
# Note that because this is a regular expression, we have to escape
|
|
||||||
# each backslash in the pattern.
|
|
||||||
"pattern": r"^((?!\\\\).)*$",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
"additionalProperties": True, # Allow new fields for forward compatibility
|
"additionalProperties": True, # Allow new fields for forward compatibility
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ from typing import (
|
||||||
Iterable,
|
Iterable,
|
||||||
List,
|
List,
|
||||||
Mapping,
|
Mapping,
|
||||||
|
Match,
|
||||||
MutableMapping,
|
MutableMapping,
|
||||||
Optional,
|
Optional,
|
||||||
Union,
|
Union,
|
||||||
|
@ -46,12 +47,10 @@ if TYPE_CHECKING:
|
||||||
from synapse.handlers.relations import BundledAggregations
|
from synapse.handlers.relations import BundledAggregations
|
||||||
|
|
||||||
|
|
||||||
# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
|
# Split strings on "." but not "\." (or "\\\.").
|
||||||
# (?<!stuff) matches if the current position in the string is not preceded
|
SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
|
||||||
# by a match for 'stuff'.
|
# Find escaped characters, e.g. those with a \ in front of them.
|
||||||
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
|
ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")
|
||||||
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
|
|
||||||
SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
|
|
||||||
|
|
||||||
CANONICALJSON_MAX_INT = (2**53) - 1
|
CANONICALJSON_MAX_INT = (2**53) - 1
|
||||||
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
|
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
|
||||||
|
@ -253,6 +252,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
|
||||||
sub_out_dict[key_to_move] = sub_dict[key_to_move]
|
sub_out_dict[key_to_move] = sub_dict[key_to_move]
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_slash(m: Match[str]) -> str:
|
||||||
|
"""
|
||||||
|
Replacement function; replace a backslash-backslash or backslash-dot with the
|
||||||
|
second character. Leaves any other string alone.
|
||||||
|
"""
|
||||||
|
if m.group(1) in ("\\", "."):
|
||||||
|
return m.group(1)
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_field(field: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Splits strings on unescaped dots and removes escaping.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
field: A string representing a path to a field.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of nested fields to traverse.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Convert the field and remove escaping:
|
||||||
|
#
|
||||||
|
# 1. "content.body.thing\.with\.dots"
|
||||||
|
# 2. ["content", "body", "thing\.with\.dots"]
|
||||||
|
# 3. ["content", "body", "thing.with.dots"]
|
||||||
|
|
||||||
|
# Find all dots (and their preceding backslashes). If the dot is unescaped
|
||||||
|
# then emit a new field part.
|
||||||
|
result = []
|
||||||
|
prev_start = 0
|
||||||
|
for match in SPLIT_FIELD_REGEX.finditer(field):
|
||||||
|
# If the match is an *even* number of characters than the dot was escaped.
|
||||||
|
if len(match.group()) % 2 == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add a new part (up to the dot, exclusive) after escaping.
|
||||||
|
result.append(
|
||||||
|
ESCAPE_SEQUENCE_PATTERN.sub(
|
||||||
|
_escape_slash, field[prev_start : match.end() - 1]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
prev_start = match.end()
|
||||||
|
|
||||||
|
# Add any part of the field after the last unescaped dot. (Note that if the
|
||||||
|
# character is a dot this correctly adds a blank string.)
|
||||||
|
result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
|
def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
|
||||||
"""Return a new dict with only the fields in 'dictionary' which are present
|
"""Return a new dict with only the fields in 'dictionary' which are present
|
||||||
in 'fields'.
|
in 'fields'.
|
||||||
|
@ -260,7 +310,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
|
||||||
If there are no event fields specified then all fields are included.
|
If there are no event fields specified then all fields are included.
|
||||||
The entries may include '.' characters to indicate sub-fields.
|
The entries may include '.' characters to indicate sub-fields.
|
||||||
So ['content.body'] will include the 'body' field of the 'content' object.
|
So ['content.body'] will include the 'body' field of the 'content' object.
|
||||||
A literal '.' character in a field name may be escaped using a '\'.
|
A literal '.' or '\' character in a field name may be escaped using a '\'.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
dictionary: The dictionary to read from.
|
dictionary: The dictionary to read from.
|
||||||
|
@ -275,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
|
||||||
|
|
||||||
# for each field, convert it:
|
# for each field, convert it:
|
||||||
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
|
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
|
||||||
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
|
split_fields = [_split_field(f) for f in fields]
|
||||||
|
|
||||||
# for each element of the output array of arrays:
|
|
||||||
# remove escaping so we can use the right key names.
|
|
||||||
split_fields[:] = [
|
|
||||||
[f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
|
|
||||||
]
|
|
||||||
|
|
||||||
output: JsonDict = {}
|
output: JsonDict = {}
|
||||||
for field_array in split_fields:
|
for field_array in split_fields:
|
||||||
|
|
|
@ -48,8 +48,6 @@ class FilteringTestCase(unittest.HomeserverTestCase):
|
||||||
invalid_filters: List[JsonDict] = [
|
invalid_filters: List[JsonDict] = [
|
||||||
# `account_data` must be a dictionary
|
# `account_data` must be a dictionary
|
||||||
{"account_data": "Hello World"},
|
{"account_data": "Hello World"},
|
||||||
# `event_fields` entries must not contain backslashes
|
|
||||||
{"event_fields": [r"\\foo"]},
|
|
||||||
# `event_format` must be "client" or "federation"
|
# `event_format` must be "client" or "federation"
|
||||||
{"event_format": "other"},
|
{"event_format": "other"},
|
||||||
# `not_rooms` must contain valid room IDs
|
# `not_rooms` must contain valid room IDs
|
||||||
|
@ -114,10 +112,6 @@ class FilteringTestCase(unittest.HomeserverTestCase):
|
||||||
"event_format": "client",
|
"event_format": "client",
|
||||||
"event_fields": ["type", "content", "sender"],
|
"event_fields": ["type", "content", "sender"],
|
||||||
},
|
},
|
||||||
# a single backslash should be permitted (though it is debatable whether
|
|
||||||
# it should be permitted before anything other than `.`, and what that
|
|
||||||
# actually means)
|
|
||||||
#
|
|
||||||
# (note that event_fields is implemented in
|
# (note that event_fields is implemented in
|
||||||
# synapse.events.utils.serialize_event, and so whether this actually works
|
# synapse.events.utils.serialize_event, and so whether this actually works
|
||||||
# is tested elsewhere. We just want to check that it is allowed through the
|
# is tested elsewhere. We just want to check that it is allowed through the
|
||||||
|
|
|
@ -16,6 +16,7 @@ import unittest as stdlib_unittest
|
||||||
from typing import Any, List, Mapping, Optional
|
from typing import Any, List, Mapping, Optional
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
from parameterized import parameterized
|
||||||
|
|
||||||
from synapse.api.constants import EventContentFields
|
from synapse.api.constants import EventContentFields
|
||||||
from synapse.api.room_versions import RoomVersions
|
from synapse.api.room_versions import RoomVersions
|
||||||
|
@ -23,6 +24,7 @@ from synapse.events import EventBase, make_event_from_dict
|
||||||
from synapse.events.utils import (
|
from synapse.events.utils import (
|
||||||
PowerLevelsContent,
|
PowerLevelsContent,
|
||||||
SerializeEventConfig,
|
SerializeEventConfig,
|
||||||
|
_split_field,
|
||||||
copy_and_fixup_power_levels_contents,
|
copy_and_fixup_power_levels_contents,
|
||||||
maybe_upsert_event_field,
|
maybe_upsert_event_field,
|
||||||
prune_event,
|
prune_event,
|
||||||
|
@ -794,3 +796,40 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
|
||||||
def test_invalid_nesting_raises_type_error(self) -> None:
|
def test_invalid_nesting_raises_type_error(self) -> None:
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}}) # type: ignore[dict-item]
|
copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}}) # type: ignore[dict-item]
|
||||||
|
|
||||||
|
|
||||||
|
class SplitFieldTestCase(stdlib_unittest.TestCase):
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
# A field with no dots.
|
||||||
|
["m", ["m"]],
|
||||||
|
# Simple dotted fields.
|
||||||
|
["m.foo", ["m", "foo"]],
|
||||||
|
["m.foo.bar", ["m", "foo", "bar"]],
|
||||||
|
# Backslash is used as an escape character.
|
||||||
|
[r"m\.foo", ["m.foo"]],
|
||||||
|
[r"m\\.foo", ["m\\", "foo"]],
|
||||||
|
[r"m\\\.foo", [r"m\.foo"]],
|
||||||
|
[r"m\\\\.foo", ["m\\\\", "foo"]],
|
||||||
|
[r"m\foo", [r"m\foo"]],
|
||||||
|
[r"m\\foo", [r"m\foo"]],
|
||||||
|
[r"m\\\foo", [r"m\\foo"]],
|
||||||
|
[r"m\\\\foo", [r"m\\foo"]],
|
||||||
|
# Ensure that escapes at the end don't cause issues.
|
||||||
|
["m.foo\\", ["m", "foo\\"]],
|
||||||
|
["m.foo\\", ["m", "foo\\"]],
|
||||||
|
[r"m.foo\.", ["m", "foo."]],
|
||||||
|
[r"m.foo\\.", ["m", "foo\\", ""]],
|
||||||
|
[r"m.foo\\\.", ["m", r"foo\."]],
|
||||||
|
# Empty parts (corresponding to properties which are an empty string) are allowed.
|
||||||
|
[".m", ["", "m"]],
|
||||||
|
["..m", ["", "", "m"]],
|
||||||
|
["m.", ["m", ""]],
|
||||||
|
["m..", ["m", "", ""]],
|
||||||
|
["m..foo", ["m", "", "foo"]],
|
||||||
|
# Invalid escape sequences.
|
||||||
|
[r"\m", [r"\m"]],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_split_field(self, input: str, expected: str) -> None:
|
||||||
|
self.assertEqual(_split_field(input), expected)
|
||||||
|
|
Loading…
Reference in New Issue