From e56c79c114db2332a25dbf4b95c351a4d2684771 Mon Sep 17 00:00:00 2001 From: pik Date: Thu, 23 Mar 2017 11:42:07 -0300 Subject: [PATCH 1/4] check_valid_filter using JSONSchema * add invalid filter tests Signed-off-by: pik --- synapse/api/filtering.py | 251 +++++++++++++++++++++++------------- tests/api/test_filtering.py | 18 ++- 2 files changed, 175 insertions(+), 94 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 47f0cf0fa..6acf98635 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -15,10 +15,163 @@ from synapse.api.errors import SynapseError from synapse.storage.presence import UserPresenceState from synapse.types import UserID, RoomID - from twisted.internet import defer import ujson as json +import jsonschema + +FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "limit": { + "type": "number" + }, + "senders": { + "$ref": "#/definitions/user_id_array" + }, + "not_senders": { + "$ref": "#/definitions/user_id_array" + }, + # TODO: We don't limit event type values but we probably should... + # check types are valid event types + "types": { + "type": "array", + "items": { + "type": "string" + } + }, + "not_types": { + "type": "array", + "items": { + "type": "string" + } + } + } +} + +ROOM_FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "not_rooms": { + "type": "array", + "items": { + "type": "string" + } + }, + "rooms": { + "type": "array", + "items": { + "type": "string" + } + }, + "ephemeral": { + "$ref": "#/definitions/room_event_filter" + }, + "include_leave": { + "type": "boolean" + }, + "state": { + "$ref": "#/definitions/room_event_filter" + }, + "timeline": { + "$ref": "#/definitions/room_event_filter" + }, + "accpount_data": { + "$ref": "#/definitions/room_event_filter" + }, + } +} + +ROOM_EVENT_FILTER_SCHEMA = { + "additionalProperties": False, + "type": "object", + "properties": { + "limit": { + "type": "number" + }, + "senders": { + "$ref": "#/definitions/user_id_array" + }, + "not_senders": { + "$ref": "#/definitions/user_id_array" + }, + "types": { + "type": "array", + "items": { + "type": "string" + } + }, + "not_types": { + "type": "array", + "items": { + "type": "string" + } + }, + "rooms": { + "type": "array", + "items": { + "type": "string" + } + }, + "not_rooms": { + "type": "array", + "items": { + "type": "string" + } + }, + "contains_url": { + "type": "boolean" + } + } +} + +USER_ID_ARRAY_SCHEMA = { + "type": "array", + "items": { + "type": "string", + "pattern": "^[A-Za-z0-9_]+:[A-Za-z0-9_-\.]+$" + } +} + +USER_FILTER_SCHEMA = { + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "schema for a Sync filter", + "type": "object", + "definitions": { + "user_id_array": USER_ID_ARRAY_SCHEMA, + "filter": FILTER_SCHEMA, + "room_filter": ROOM_FILTER_SCHEMA, + "room_event_filter": ROOM_EVENT_FILTER_SCHEMA + }, + "properties": { + "presence": { + "$ref": "#/definitions/filter" + }, + "account_data": { + "$ref": "#/definitions/filter" + }, + "room": { + "$ref": "#/definitions/room_filter" + }, + # "event_format": { + # "type": { "enum": [ "client", "federation" ] } + # }, + "event_fields": { + "type": "array", + "items": { + "type": "string", + # Don't allow '\\' in event field filters. This makes matching + # events a lot easier as we can then use a negative lookbehind + # assertion to split '\.' If we allowed \\ then it would + # incorrectly split '\\.' See synapse.events.utils.serialize_event + "pattern": "^((?!\\\).)*$" + } + } + }, + "additionalProperties": False +} class Filtering(object): @@ -53,98 +206,10 @@ class Filtering(object): # NB: Filters are the complete json blobs. "Definitions" are an # individual top-level key e.g. public_user_data. Filters are made of # many definitions. - - top_level_definitions = [ - "presence", "account_data" - ] - - room_level_definitions = [ - "state", "timeline", "ephemeral", "account_data" - ] - - for key in top_level_definitions: - if key in user_filter_json: - self._check_definition(user_filter_json[key]) - - if "room" in user_filter_json: - self._check_definition_room_lists(user_filter_json["room"]) - for key in room_level_definitions: - if key in user_filter_json["room"]: - self._check_definition(user_filter_json["room"][key]) - - if "event_fields" in user_filter_json: - if type(user_filter_json["event_fields"]) != list: - raise SynapseError(400, "event_fields must be a list of strings") - for field in user_filter_json["event_fields"]: - if not isinstance(field, basestring): - raise SynapseError(400, "Event field must be a string") - # Don't allow '\\' in event field filters. This makes matching - # events a lot easier as we can then use a negative lookbehind - # assertion to split '\.' If we allowed \\ then it would - # incorrectly split '\\.' See synapse.events.utils.serialize_event - if r'\\' in field: - raise SynapseError( - 400, r'The escape character \ cannot itself be escaped' - ) - - def _check_definition_room_lists(self, definition): - """Check that "rooms" and "not_rooms" are lists of room ids if they - are present - - Args: - definition(dict): The filter definition - Raises: - SynapseError: If there was a problem with this definition. - """ - # check rooms are valid room IDs - room_id_keys = ["rooms", "not_rooms"] - for key in room_id_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for room_id in definition[key]: - RoomID.from_string(room_id) - - def _check_definition(self, definition): - """Check if the provided definition is valid. - - This inspects not only the types but also the values to make sure they - make sense. - - Args: - definition(dict): The filter definition - Raises: - SynapseError: If there was a problem with this definition. - """ - # NB: Filters are the complete json blobs. "Definitions" are an - # individual top-level key e.g. public_user_data. Filters are made of - # many definitions. - if type(definition) != dict: - raise SynapseError( - 400, "Expected JSON object, not %s" % (definition,) - ) - - self._check_definition_room_lists(definition) - - # check senders are valid user IDs - user_id_keys = ["senders", "not_senders"] - for key in user_id_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for user_id in definition[key]: - UserID.from_string(user_id) - - # TODO: We don't limit event type values but we probably should... - # check types are valid event types - event_keys = ["types", "not_types"] - for key in event_keys: - if key in definition: - if type(definition[key]) != list: - raise SynapseError(400, "Expected %s to be a list." % key) - for event_type in definition[key]: - if not isinstance(event_type, basestring): - raise SynapseError(400, "Event type should be a string") + try: + jsonschema.validate(user_filter_json, USER_FILTER_SCHEMA) + except jsonschema.ValidationError as e: + raise SynapseError(400, e.message) class FilterCollection(object): diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py index 50e8607c1..ce4116ff5 100644 --- a/tests/api/test_filtering.py +++ b/tests/api/test_filtering.py @@ -23,6 +23,7 @@ from tests.utils import ( from synapse.api.filtering import Filter from synapse.events import FrozenEvent +from synapse.api.errors import SynapseError user_localpart = "test_user" @@ -34,7 +35,6 @@ def MockEvent(**kwargs): kwargs["type"] = "fake_type" return FrozenEvent(kwargs) - class FilteringTestCase(unittest.TestCase): @defer.inlineCallbacks @@ -54,6 +54,22 @@ class FilteringTestCase(unittest.TestCase): self.datastore = hs.get_datastore() + def test_errors_on_invalid_filters(self): + invalid_filters = [ + { "boom": {} }, + { "account_data": "Hello World" }, + { "event_fields": ["\\foo"] }, + { "room": { "timeline" : { "limit" : 0 }, "state": { "not_bars": ["*"]} } }, + ] + for filter in invalid_filters: + with self.assertRaises(SynapseError) as check_filter_error: + self.filtering.check_valid_filter(filter) + self.assertIsInstance(check_filter_error.exception, SynapseError) + + def test_limits_are_applied(self): + #TODO + pass + def test_definition_types_works_with_literals(self): definition = { "types": ["m.room.message", "org.matrix.foo.bar"] From acafcf1c5b1c40fe14054a1d5bb8277a9f16a492 Mon Sep 17 00:00:00 2001 From: pik Date: Tue, 7 Mar 2017 20:54:02 -0300 Subject: [PATCH 2/4] Add valid filter tests, flake8, fix typo Signed-off-by: pik --- synapse/api/filtering.py | 11 ++++---- tests/api/test_filtering.py | 54 +++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 6acf98635..3d078e622 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -78,7 +78,7 @@ ROOM_FILTER_SCHEMA = { "timeline": { "$ref": "#/definitions/room_event_filter" }, - "accpount_data": { + "account_data": { "$ref": "#/definitions/room_event_filter" }, } @@ -131,7 +131,7 @@ USER_ID_ARRAY_SCHEMA = { "type": "array", "items": { "type": "string", - "pattern": "^[A-Za-z0-9_]+:[A-Za-z0-9_-\.]+$" + "pattern": "^@[A-Za-z0-9_]+:[A-Za-z0-9_\-\.]+$" } } @@ -155,9 +155,10 @@ USER_FILTER_SCHEMA = { "room": { "$ref": "#/definitions/room_filter" }, - # "event_format": { - # "type": { "enum": [ "client", "federation" ] } - # }, + "event_format": { + "type": "string", + "enum": ["client", "federation"] + }, "event_fields": { "type": "array", "items": { diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py index ce4116ff5..1ce1acb3c 100644 --- a/tests/api/test_filtering.py +++ b/tests/api/test_filtering.py @@ -35,6 +35,7 @@ def MockEvent(**kwargs): kwargs["type"] = "fake_type" return FrozenEvent(kwargs) + class FilteringTestCase(unittest.TestCase): @defer.inlineCallbacks @@ -56,18 +57,61 @@ class FilteringTestCase(unittest.TestCase): def test_errors_on_invalid_filters(self): invalid_filters = [ - { "boom": {} }, - { "account_data": "Hello World" }, - { "event_fields": ["\\foo"] }, - { "room": { "timeline" : { "limit" : 0 }, "state": { "not_bars": ["*"]} } }, + {"boom": {}}, + {"account_data": "Hello World"}, + {"event_fields": ["\\foo"]}, + {"room": {"timeline": {"limit": 0}, "state": {"not_bars": ["*"]}}}, + {"event_format": "other"} ] for filter in invalid_filters: with self.assertRaises(SynapseError) as check_filter_error: self.filtering.check_valid_filter(filter) self.assertIsInstance(check_filter_error.exception, SynapseError) + def test_valid_filters(self): + valid_filters = [ + { + "room": { + "timeline": {"limit": 20}, + "state": {"not_types": ["m.room.member"]}, + "ephemeral": {"limit": 0, "not_types": ["*"]}, + "include_leave": False, + "rooms": ["#dee:pik-test"], + "not_rooms": ["#gee:pik-test"], + "account_data": {"limit": 0, "types": ["*"]} + } + }, + { + "room": { + "state": { + "types": ["m.room.*"], + "not_rooms": ["!726s6s6q:example.com"] + }, + "timeline": { + "limit": 10, + "types": ["m.room.message"], + "not_rooms": ["!726s6s6q:example.com"], + "not_senders": ["@spam:example.com"] + }, + "ephemeral": { + "types": ["m.receipt", "m.typing"], + "not_rooms": ["!726s6s6q:example.com"], + "not_senders": ["@spam:example.com"] + } + }, + "presence": { + "types": ["m.presence"], + "not_senders": ["@alice:example.com"] + }, + "event_format": "client", + "event_fields": ["type", "content", "sender"] + } + ] + for filter in valid_filters: + self.filtering.check_valid_filter(filter) + def test_limits_are_applied(self): - #TODO + # TODO pass def test_definition_types_works_with_literals(self): From 566641a0b5f04e444383d5e3a5493b22e551ff03 Mon Sep 17 00:00:00 2001 From: pik Date: Thu, 23 Mar 2017 11:42:41 -0300 Subject: [PATCH 3/4] use jsonschema.FormatChecker for RoomID and UserID strings * use a valid filter in rest/client/v2_alpha test Signed-off-by: pik --- synapse/api/filtering.py | 45 ++++++++++++++--------- tests/api/test_filtering.py | 15 ++++++-- tests/rest/client/v2_alpha/test_filter.py | 4 +- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 3d078e622..83206348e 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -19,6 +19,7 @@ from twisted.internet import defer import ujson as json import jsonschema +from jsonschema import FormatChecker FILTER_SCHEMA = { "additionalProperties": False, @@ -55,16 +56,10 @@ ROOM_FILTER_SCHEMA = { "type": "object", "properties": { "not_rooms": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/room_id_array" }, "rooms": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/room_id_array" }, "ephemeral": { "$ref": "#/definitions/room_event_filter" @@ -110,16 +105,10 @@ ROOM_EVENT_FILTER_SCHEMA = { } }, "rooms": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/room_id_array" }, "not_rooms": { - "type": "array", - "items": { - "type": "string" - } + "$ref": "#/definitions/room_id_array" }, "contains_url": { "type": "boolean" @@ -131,7 +120,15 @@ USER_ID_ARRAY_SCHEMA = { "type": "array", "items": { "type": "string", - "pattern": "^@[A-Za-z0-9_]+:[A-Za-z0-9_\-\.]+$" + "format": "matrix_user_id" + } +} + +ROOM_ID_ARRAY_SCHEMA = { + "type": "array", + "items": { + "type": "string", + "format": "matrix_room_id" } } @@ -140,6 +137,7 @@ USER_FILTER_SCHEMA = { "description": "schema for a Sync filter", "type": "object", "definitions": { + "room_id_array": ROOM_ID_ARRAY_SCHEMA, "user_id_array": USER_ID_ARRAY_SCHEMA, "filter": FILTER_SCHEMA, "room_filter": ROOM_FILTER_SCHEMA, @@ -175,6 +173,16 @@ USER_FILTER_SCHEMA = { } +@FormatChecker.cls_checks('matrix_room_id') +def matrix_room_id_validator(room_id_str): + return RoomID.from_string(room_id_str) + + +@FormatChecker.cls_checks('matrix_user_id') +def matrix_user_id_validator(user_id_str): + return UserID.from_string(user_id_str) + + class Filtering(object): def __init__(self, hs): @@ -208,7 +216,8 @@ class Filtering(object): # individual top-level key e.g. public_user_data. Filters are made of # many definitions. try: - jsonschema.validate(user_filter_json, USER_FILTER_SCHEMA) + jsonschema.validate(user_filter_json, USER_FILTER_SCHEMA, + format_checker=FormatChecker()) except jsonschema.ValidationError as e: raise SynapseError(400, e.message) diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py index 1ce1acb3c..dcceca7f3 100644 --- a/tests/api/test_filtering.py +++ b/tests/api/test_filtering.py @@ -25,6 +25,8 @@ from synapse.api.filtering import Filter from synapse.events import FrozenEvent from synapse.api.errors import SynapseError +import jsonschema + user_localpart = "test_user" @@ -61,7 +63,9 @@ class FilteringTestCase(unittest.TestCase): {"account_data": "Hello World"}, {"event_fields": ["\\foo"]}, {"room": {"timeline": {"limit": 0}, "state": {"not_bars": ["*"]}}}, - {"event_format": "other"} + {"event_format": "other"}, + {"room": {"not_rooms": ["#foo:pik-test"]}}, + {"presence": {"senders": ["@bar;pik.test.com"]}} ] for filter in invalid_filters: with self.assertRaises(SynapseError) as check_filter_error: @@ -76,8 +80,8 @@ class FilteringTestCase(unittest.TestCase): "state": {"not_types": ["m.room.member"]}, "ephemeral": {"limit": 0, "not_types": ["*"]}, "include_leave": False, - "rooms": ["#dee:pik-test"], - "not_rooms": ["#gee:pik-test"], + "rooms": ["!dee:pik-test"], + "not_rooms": ["!gee:pik-test"], "account_data": {"limit": 0, "types": ["*"]} } }, @@ -108,7 +112,10 @@ class FilteringTestCase(unittest.TestCase): } ] for filter in valid_filters: - self.filtering.check_valid_filter(filter) + try: + self.filtering.check_valid_filter(filter) + except jsonschema.ValidationError as e: + self.fail(e) def test_limits_are_applied(self): # TODO diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index 3d27d03cb..76b833e11 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -33,8 +33,8 @@ PATH_PREFIX = "/_matrix/client/v2_alpha" class FilterTestCase(unittest.TestCase): USER_ID = "@apple:test" - EXAMPLE_FILTER = {"type": ["m.*"]} - EXAMPLE_FILTER_JSON = '{"type": ["m.*"]}' + EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}} + EXAMPLE_FILTER_JSON = '{"room": {"timeline": {"types": ["m.room.message"]}}}' TO_REGISTER = [filter] @defer.inlineCallbacks From 250ce11ab9589b09f827e7f633ac340993eaac9a Mon Sep 17 00:00:00 2001 From: pik Date: Thu, 23 Mar 2017 11:42:47 -0300 Subject: [PATCH 4/4] Add jsonschema to python_dependencies.py Signed-off-by: pik --- synapse/python_dependencies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index c4777b2a2..ed7f1c89a 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -19,6 +19,7 @@ from distutils.version import LooseVersion logger = logging.getLogger(__name__) REQUIREMENTS = { + "jsonschema>=2.5.1": ["jsonschema>=2.5.1"], "frozendict>=0.4": ["frozendict"], "unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"], "canonicaljson>=1.0.0": ["canonicaljson>=1.0.0"],