From 96e0cdbc5af0563ee805ec4e588e1df14899af66 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 3 May 2022 21:27:52 +0100 Subject: [PATCH] Add a consistency check on events read from the database (#12620) I've seen a few errors which can only plausibly be explained by the calculated event id for an event being different from the ID of the event in the database. It should be cheap to check this, so let's do so and raise an exception. --- changelog.d/12620.misc | 1 + .../storage/databases/main/events_worker.py | 12 ++++ .../databases/main/test_events_worker.py | 59 ++++++++++++------- 3 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 changelog.d/12620.misc diff --git a/changelog.d/12620.misc b/changelog.d/12620.misc new file mode 100644 index 000000000..63f8e540c --- /dev/null +++ b/changelog.d/12620.misc @@ -0,0 +1 @@ +Add a consistency check on events which we read from the database. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index c31fc00ea..0a48e5d29 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1094,6 +1094,18 @@ class EventsWorkerStore(SQLBaseStore): original_ev.internal_metadata.stream_ordering = row.stream_ordering original_ev.internal_metadata.outlier = row.outlier + # Consistency check: if the content of the event has been modified in the + # database, then the calculated event ID will not match the event id in the + # database. + if original_ev.event_id != event_id: + # it's difficult to see what to do here. Pretty much all bets are off + # if Synapse cannot rely on the consistency of its database. + raise RuntimeError( + f"Database corruption: Event {event_id} in room {d['room_id']} " + f"from the database appears to have been modified (calculated " + f"event id {original_ev.event_id})" + ) + event_map[event_id] = original_ev # finally, we can decide whether each one needs redacting, and build diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index bf6374f93..c237a8c7e 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -13,7 +13,7 @@ # limitations under the License. import json from contextlib import contextmanager -from typing import Generator, Tuple +from typing import Generator, List, Tuple from unittest import mock from twisted.enterprise.adbapi import ConnectionPool @@ -21,6 +21,7 @@ from twisted.internet.defer import CancelledError, Deferred, ensureDeferred from twisted.test.proto_helpers import MemoryReactor from synapse.api.room_versions import EventFormatVersions, RoomVersions +from synapse.events import make_event_from_dict from synapse.logging.context import LoggingContext from synapse.rest import admin from synapse.rest.client import login, room @@ -49,23 +50,28 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase): ) ) - for idx, (rid, eid) in enumerate( + self.event_ids: List[str] = [] + for idx, rid in enumerate( ( - ("room1", "event10"), - ("room1", "event11"), - ("room1", "event12"), - ("room2", "event20"), + "room1", + "room1", + "room1", + "room2", ) ): + event_json = {"type": f"test {idx}", "room_id": rid} + event = make_event_from_dict(event_json, room_version=RoomVersions.V4) + event_id = event.event_id + self.get_success( self.store.db_pool.simple_insert( "events", { - "event_id": eid, + "event_id": event_id, "room_id": rid, "topological_ordering": idx, "stream_ordering": idx, - "type": "test", + "type": event.type, "processed": True, "outlier": False, }, @@ -75,21 +81,22 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase): self.store.db_pool.simple_insert( "event_json", { - "event_id": eid, + "event_id": event_id, "room_id": rid, - "json": json.dumps({"type": "test", "room_id": rid}), + "json": json.dumps(event_json), "internal_metadata": "{}", "format_version": 3, }, ) ) + self.event_ids.append(event_id) def test_simple(self): with LoggingContext(name="test") as ctx: res = self.get_success( - self.store.have_seen_events("room1", ["event10", "event19"]) + self.store.have_seen_events("room1", [self.event_ids[0], "event19"]) ) - self.assertEqual(res, {"event10"}) + self.assertEqual(res, {self.event_ids[0]}) # that should result in a single db query self.assertEqual(ctx.get_resource_usage().db_txn_count, 1) @@ -97,19 +104,21 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase): # a second lookup of the same events should cause no queries with LoggingContext(name="test") as ctx: res = self.get_success( - self.store.have_seen_events("room1", ["event10", "event19"]) + self.store.have_seen_events("room1", [self.event_ids[0], "event19"]) ) - self.assertEqual(res, {"event10"}) + self.assertEqual(res, {self.event_ids[0]}) self.assertEqual(ctx.get_resource_usage().db_txn_count, 0) def test_query_via_event_cache(self): # fetch an event into the event cache - self.get_success(self.store.get_event("event10")) + self.get_success(self.store.get_event(self.event_ids[0])) # looking it up should now cause no db hits with LoggingContext(name="test") as ctx: - res = self.get_success(self.store.have_seen_events("room1", ["event10"])) - self.assertEqual(res, {"event10"}) + res = self.get_success( + self.store.have_seen_events("room1", [self.event_ids[0]]) + ) + self.assertEqual(res, {self.event_ids[0]}) self.assertEqual(ctx.get_resource_usage().db_txn_count, 0) @@ -167,7 +176,6 @@ class DatabaseOutageTestCase(unittest.HomeserverTestCase): self.store: EventsWorkerStore = hs.get_datastores().main self.room_id = f"!room:{hs.hostname}" - self.event_ids = [f"event{i}" for i in range(20)] self._populate_events() @@ -190,8 +198,14 @@ class DatabaseOutageTestCase(unittest.HomeserverTestCase): ) ) - self.event_ids = [f"event{i}" for i in range(20)] - for idx, event_id in enumerate(self.event_ids): + self.event_ids: List[str] = [] + for idx in range(20): + event_json = { + "type": f"test {idx}", + "room_id": self.room_id, + } + event = make_event_from_dict(event_json, room_version=RoomVersions.V4) + event_id = event.event_id self.get_success( self.store.db_pool.simple_upsert( "events", @@ -201,7 +215,7 @@ class DatabaseOutageTestCase(unittest.HomeserverTestCase): "room_id": self.room_id, "topological_ordering": idx, "stream_ordering": idx, - "type": "test", + "type": event.type, "processed": True, "outlier": False, }, @@ -213,12 +227,13 @@ class DatabaseOutageTestCase(unittest.HomeserverTestCase): {"event_id": event_id}, { "room_id": self.room_id, - "json": json.dumps({"type": "test", "room_id": self.room_id}), + "json": json.dumps(event_json), "internal_metadata": "{}", "format_version": EventFormatVersions.V3, }, ) ) + self.event_ids.append(event_id) @contextmanager def _outage(self) -> Generator[None, None, None]: