Merge pull request #1783 from pik/filter-validation
JSONSchema Validation For Filters
This commit is contained in:
commit
6af0096f4f
|
@ -15,10 +15,172 @@
|
|||
from synapse.api.errors import SynapseError
|
||||
from synapse.storage.presence import UserPresenceState
|
||||
from synapse.types import UserID, RoomID
|
||||
|
||||
from twisted.internet import defer
|
||||
|
||||
import ujson as json
|
||||
import jsonschema
|
||||
from jsonschema import FormatChecker
|
||||
|
||||
FILTER_SCHEMA = {
|
||||
"additionalProperties": False,
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "number"
|
||||
},
|
||||
"senders": {
|
||||
"$ref": "#/definitions/user_id_array"
|
||||
},
|
||||
"not_senders": {
|
||||
"$ref": "#/definitions/user_id_array"
|
||||
},
|
||||
# TODO: We don't limit event type values but we probably should...
|
||||
# check types are valid event types
|
||||
"types": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"not_types": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ROOM_FILTER_SCHEMA = {
|
||||
"additionalProperties": False,
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"not_rooms": {
|
||||
"$ref": "#/definitions/room_id_array"
|
||||
},
|
||||
"rooms": {
|
||||
"$ref": "#/definitions/room_id_array"
|
||||
},
|
||||
"ephemeral": {
|
||||
"$ref": "#/definitions/room_event_filter"
|
||||
},
|
||||
"include_leave": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"state": {
|
||||
"$ref": "#/definitions/room_event_filter"
|
||||
},
|
||||
"timeline": {
|
||||
"$ref": "#/definitions/room_event_filter"
|
||||
},
|
||||
"account_data": {
|
||||
"$ref": "#/definitions/room_event_filter"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
ROOM_EVENT_FILTER_SCHEMA = {
|
||||
"additionalProperties": False,
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "number"
|
||||
},
|
||||
"senders": {
|
||||
"$ref": "#/definitions/user_id_array"
|
||||
},
|
||||
"not_senders": {
|
||||
"$ref": "#/definitions/user_id_array"
|
||||
},
|
||||
"types": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"not_types": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"rooms": {
|
||||
"$ref": "#/definitions/room_id_array"
|
||||
},
|
||||
"not_rooms": {
|
||||
"$ref": "#/definitions/room_id_array"
|
||||
},
|
||||
"contains_url": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
USER_ID_ARRAY_SCHEMA = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "matrix_user_id"
|
||||
}
|
||||
}
|
||||
|
||||
ROOM_ID_ARRAY_SCHEMA = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "matrix_room_id"
|
||||
}
|
||||
}
|
||||
|
||||
USER_FILTER_SCHEMA = {
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"description": "schema for a Sync filter",
|
||||
"type": "object",
|
||||
"definitions": {
|
||||
"room_id_array": ROOM_ID_ARRAY_SCHEMA,
|
||||
"user_id_array": USER_ID_ARRAY_SCHEMA,
|
||||
"filter": FILTER_SCHEMA,
|
||||
"room_filter": ROOM_FILTER_SCHEMA,
|
||||
"room_event_filter": ROOM_EVENT_FILTER_SCHEMA
|
||||
},
|
||||
"properties": {
|
||||
"presence": {
|
||||
"$ref": "#/definitions/filter"
|
||||
},
|
||||
"account_data": {
|
||||
"$ref": "#/definitions/filter"
|
||||
},
|
||||
"room": {
|
||||
"$ref": "#/definitions/room_filter"
|
||||
},
|
||||
"event_format": {
|
||||
"type": "string",
|
||||
"enum": ["client", "federation"]
|
||||
},
|
||||
"event_fields": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
# Don't allow '\\' in event field filters. This makes matching
|
||||
# events a lot easier as we can then use a negative lookbehind
|
||||
# assertion to split '\.' If we allowed \\ then it would
|
||||
# incorrectly split '\\.' See synapse.events.utils.serialize_event
|
||||
"pattern": "^((?!\\\).)*$"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": False
|
||||
}
|
||||
|
||||
|
||||
@FormatChecker.cls_checks('matrix_room_id')
|
||||
def matrix_room_id_validator(room_id_str):
|
||||
return RoomID.from_string(room_id_str)
|
||||
|
||||
|
||||
@FormatChecker.cls_checks('matrix_user_id')
|
||||
def matrix_user_id_validator(user_id_str):
|
||||
return UserID.from_string(user_id_str)
|
||||
|
||||
|
||||
class Filtering(object):
|
||||
|
@ -53,98 +215,11 @@ class Filtering(object):
|
|||
# NB: Filters are the complete json blobs. "Definitions" are an
|
||||
# individual top-level key e.g. public_user_data. Filters are made of
|
||||
# many definitions.
|
||||
|
||||
top_level_definitions = [
|
||||
"presence", "account_data"
|
||||
]
|
||||
|
||||
room_level_definitions = [
|
||||
"state", "timeline", "ephemeral", "account_data"
|
||||
]
|
||||
|
||||
for key in top_level_definitions:
|
||||
if key in user_filter_json:
|
||||
self._check_definition(user_filter_json[key])
|
||||
|
||||
if "room" in user_filter_json:
|
||||
self._check_definition_room_lists(user_filter_json["room"])
|
||||
for key in room_level_definitions:
|
||||
if key in user_filter_json["room"]:
|
||||
self._check_definition(user_filter_json["room"][key])
|
||||
|
||||
if "event_fields" in user_filter_json:
|
||||
if type(user_filter_json["event_fields"]) != list:
|
||||
raise SynapseError(400, "event_fields must be a list of strings")
|
||||
for field in user_filter_json["event_fields"]:
|
||||
if not isinstance(field, basestring):
|
||||
raise SynapseError(400, "Event field must be a string")
|
||||
# Don't allow '\\' in event field filters. This makes matching
|
||||
# events a lot easier as we can then use a negative lookbehind
|
||||
# assertion to split '\.' If we allowed \\ then it would
|
||||
# incorrectly split '\\.' See synapse.events.utils.serialize_event
|
||||
if r'\\' in field:
|
||||
raise SynapseError(
|
||||
400, r'The escape character \ cannot itself be escaped'
|
||||
)
|
||||
|
||||
def _check_definition_room_lists(self, definition):
|
||||
"""Check that "rooms" and "not_rooms" are lists of room ids if they
|
||||
are present
|
||||
|
||||
Args:
|
||||
definition(dict): The filter definition
|
||||
Raises:
|
||||
SynapseError: If there was a problem with this definition.
|
||||
"""
|
||||
# check rooms are valid room IDs
|
||||
room_id_keys = ["rooms", "not_rooms"]
|
||||
for key in room_id_keys:
|
||||
if key in definition:
|
||||
if type(definition[key]) != list:
|
||||
raise SynapseError(400, "Expected %s to be a list." % key)
|
||||
for room_id in definition[key]:
|
||||
RoomID.from_string(room_id)
|
||||
|
||||
def _check_definition(self, definition):
|
||||
"""Check if the provided definition is valid.
|
||||
|
||||
This inspects not only the types but also the values to make sure they
|
||||
make sense.
|
||||
|
||||
Args:
|
||||
definition(dict): The filter definition
|
||||
Raises:
|
||||
SynapseError: If there was a problem with this definition.
|
||||
"""
|
||||
# NB: Filters are the complete json blobs. "Definitions" are an
|
||||
# individual top-level key e.g. public_user_data. Filters are made of
|
||||
# many definitions.
|
||||
if type(definition) != dict:
|
||||
raise SynapseError(
|
||||
400, "Expected JSON object, not %s" % (definition,)
|
||||
)
|
||||
|
||||
self._check_definition_room_lists(definition)
|
||||
|
||||
# check senders are valid user IDs
|
||||
user_id_keys = ["senders", "not_senders"]
|
||||
for key in user_id_keys:
|
||||
if key in definition:
|
||||
if type(definition[key]) != list:
|
||||
raise SynapseError(400, "Expected %s to be a list." % key)
|
||||
for user_id in definition[key]:
|
||||
UserID.from_string(user_id)
|
||||
|
||||
# TODO: We don't limit event type values but we probably should...
|
||||
# check types are valid event types
|
||||
event_keys = ["types", "not_types"]
|
||||
for key in event_keys:
|
||||
if key in definition:
|
||||
if type(definition[key]) != list:
|
||||
raise SynapseError(400, "Expected %s to be a list." % key)
|
||||
for event_type in definition[key]:
|
||||
if not isinstance(event_type, basestring):
|
||||
raise SynapseError(400, "Event type should be a string")
|
||||
try:
|
||||
jsonschema.validate(user_filter_json, USER_FILTER_SCHEMA,
|
||||
format_checker=FormatChecker())
|
||||
except jsonschema.ValidationError as e:
|
||||
raise SynapseError(400, e.message)
|
||||
|
||||
|
||||
class FilterCollection(object):
|
||||
|
|
|
@ -19,6 +19,7 @@ from distutils.version import LooseVersion
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
REQUIREMENTS = {
|
||||
"jsonschema>=2.5.1": ["jsonschema>=2.5.1"],
|
||||
"frozendict>=0.4": ["frozendict"],
|
||||
"unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"],
|
||||
"canonicaljson>=1.0.0": ["canonicaljson>=1.0.0"],
|
||||
|
|
|
@ -23,6 +23,9 @@ from tests.utils import (
|
|||
|
||||
from synapse.api.filtering import Filter
|
||||
from synapse.events import FrozenEvent
|
||||
from synapse.api.errors import SynapseError
|
||||
|
||||
import jsonschema
|
||||
|
||||
user_localpart = "test_user"
|
||||
|
||||
|
@ -54,6 +57,70 @@ class FilteringTestCase(unittest.TestCase):
|
|||
|
||||
self.datastore = hs.get_datastore()
|
||||
|
||||
def test_errors_on_invalid_filters(self):
|
||||
invalid_filters = [
|
||||
{"boom": {}},
|
||||
{"account_data": "Hello World"},
|
||||
{"event_fields": ["\\foo"]},
|
||||
{"room": {"timeline": {"limit": 0}, "state": {"not_bars": ["*"]}}},
|
||||
{"event_format": "other"},
|
||||
{"room": {"not_rooms": ["#foo:pik-test"]}},
|
||||
{"presence": {"senders": ["@bar;pik.test.com"]}}
|
||||
]
|
||||
for filter in invalid_filters:
|
||||
with self.assertRaises(SynapseError) as check_filter_error:
|
||||
self.filtering.check_valid_filter(filter)
|
||||
self.assertIsInstance(check_filter_error.exception, SynapseError)
|
||||
|
||||
def test_valid_filters(self):
|
||||
valid_filters = [
|
||||
{
|
||||
"room": {
|
||||
"timeline": {"limit": 20},
|
||||
"state": {"not_types": ["m.room.member"]},
|
||||
"ephemeral": {"limit": 0, "not_types": ["*"]},
|
||||
"include_leave": False,
|
||||
"rooms": ["!dee:pik-test"],
|
||||
"not_rooms": ["!gee:pik-test"],
|
||||
"account_data": {"limit": 0, "types": ["*"]}
|
||||
}
|
||||
},
|
||||
{
|
||||
"room": {
|
||||
"state": {
|
||||
"types": ["m.room.*"],
|
||||
"not_rooms": ["!726s6s6q:example.com"]
|
||||
},
|
||||
"timeline": {
|
||||
"limit": 10,
|
||||
"types": ["m.room.message"],
|
||||
"not_rooms": ["!726s6s6q:example.com"],
|
||||
"not_senders": ["@spam:example.com"]
|
||||
},
|
||||
"ephemeral": {
|
||||
"types": ["m.receipt", "m.typing"],
|
||||
"not_rooms": ["!726s6s6q:example.com"],
|
||||
"not_senders": ["@spam:example.com"]
|
||||
}
|
||||
},
|
||||
"presence": {
|
||||
"types": ["m.presence"],
|
||||
"not_senders": ["@alice:example.com"]
|
||||
},
|
||||
"event_format": "client",
|
||||
"event_fields": ["type", "content", "sender"]
|
||||
}
|
||||
]
|
||||
for filter in valid_filters:
|
||||
try:
|
||||
self.filtering.check_valid_filter(filter)
|
||||
except jsonschema.ValidationError as e:
|
||||
self.fail(e)
|
||||
|
||||
def test_limits_are_applied(self):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
def test_definition_types_works_with_literals(self):
|
||||
definition = {
|
||||
"types": ["m.room.message", "org.matrix.foo.bar"]
|
||||
|
|
|
@ -33,8 +33,8 @@ PATH_PREFIX = "/_matrix/client/v2_alpha"
|
|||
class FilterTestCase(unittest.TestCase):
|
||||
|
||||
USER_ID = "@apple:test"
|
||||
EXAMPLE_FILTER = {"type": ["m.*"]}
|
||||
EXAMPLE_FILTER_JSON = '{"type": ["m.*"]}'
|
||||
EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}}
|
||||
EXAMPLE_FILTER_JSON = '{"room": {"timeline": {"types": ["m.room.message"]}}}'
|
||||
TO_REGISTER = [filter]
|
||||
|
||||
@defer.inlineCallbacks
|
||||
|
|
Loading…
Reference in New Issue