From c955f22e2c88676944124a4a3c80112b35231035 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Jun 2021 10:27:12 +0100 Subject: [PATCH 1/5] Fix bug when running presence off master (#10149) Hopefully fixes #10027. --- changelog.d/10149.bugfix | 1 + synapse/storage/databases/main/presence.py | 2 +- synapse/storage/util/id_generators.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10149.bugfix diff --git a/changelog.d/10149.bugfix b/changelog.d/10149.bugfix new file mode 100644 index 000000000..cb2d2eedb --- /dev/null +++ b/changelog.d/10149.bugfix @@ -0,0 +1 @@ +Fix a bug which caused presence updates to stop working some time after restart, when using a presence writer worker. diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 6a2baa784..1388771c4 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -50,7 +50,7 @@ class PresenceStore(SQLBaseStore): instance_name=self._instance_name, tables=[("presence_stream", "instance_name", "stream_id")], sequence_name="presence_stream_sequence", - writers=hs.config.worker.writers.to_device, + writers=hs.config.worker.writers.presence, ) else: self._presence_id_gen = StreamIdGenerator( diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index b1bd3a52d..f1e62f9e8 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -397,6 +397,11 @@ class MultiWriterIdGenerator: # ... persist event ... """ + # If we have a list of instances that are allowed to write to this + # stream, make sure we're in it. + if self._writers and self._instance_name not in self._writers: + raise Exception("Tried to allocate stream ID on non-writer") + return _MultiWriterCtxManager(self) def get_next_mult(self, n: int): @@ -406,6 +411,11 @@ class MultiWriterIdGenerator: # ... persist events ... """ + # If we have a list of instances that are allowed to write to this + # stream, make sure we're in it. + if self._writers and self._instance_name not in self._writers: + raise Exception("Tried to allocate stream ID on non-writer") + return _MultiWriterCtxManager(self, n) def get_next_txn(self, txn: LoggingTransaction): @@ -416,6 +426,11 @@ class MultiWriterIdGenerator: # ... persist event ... """ + # If we have a list of instances that are allowed to write to this + # stream, make sure we're in it. + if self._writers and self._instance_name not in self._writers: + raise Exception("Tried to allocate stream ID on non-writer") + next_id = self._load_next_id_txn(txn) with self._lock: From 5e0b4719ea6650596470f2d3bff91a19096067b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Jun 2021 13:08:30 +0100 Subject: [PATCH 2/5] Fix sending presence over federation when using workers (#10163) When using a federation sender we'd send out all local presence updates over federation even when they shouldn't be. Fixes #10153. --- changelog.d/10163.bugfix | 1 + synapse/handlers/presence.py | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10163.bugfix diff --git a/changelog.d/10163.bugfix b/changelog.d/10163.bugfix new file mode 100644 index 000000000..7ccde6674 --- /dev/null +++ b/changelog.d/10163.bugfix @@ -0,0 +1 @@ +Fix a bug when using federation sender worker where it would send out more presence updates than necessary, leading to high resource usage. Broke in v1.33.0. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index f5a049d75..79508580a 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -495,9 +495,6 @@ class WorkerPresenceHandler(BasePresenceHandler): users=users_to_states.keys(), ) - # If this is a federation sender, notify about presence updates. - await self.maybe_send_presence_to_interested_destinations(states) - async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list ): @@ -519,11 +516,27 @@ class WorkerPresenceHandler(BasePresenceHandler): for row in rows ] - for state in states: - self.user_to_current_state[state.user_id] = state + # The list of states to notify sync streams and remote servers about. + # This is calculated by comparing the old and new states for each user + # using `should_notify(..)`. + # + # Note that this is necessary as the presence writer will periodically + # flush presence state changes that should not be notified about to the + # DB, and so will be sent over the replication stream. + state_to_notify = [] + + for new_state in states: + old_state = self.user_to_current_state.get(new_state.user_id) + self.user_to_current_state[new_state.user_id] = new_state + + if not old_state or should_notify(old_state, new_state): + state_to_notify.append(new_state) stream_id = token - await self.notify_from_replication(states, stream_id) + await self.notify_from_replication(state_to_notify, stream_id) + + # If this is a federation sender, notify about presence updates. + await self.maybe_send_presence_to_interested_destinations(state_to_notify) def get_currently_syncing_users_for_replication(self) -> Iterable[str]: return [ From cdd985c64facb15b36fdc3bf479d25d6572f29a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Jun 2021 15:19:42 +0100 Subject: [PATCH 3/5] Only send a presence state to a destination once (#10165) It turns out that we were sending the same presence state to a remote potentially multiple times. --- changelog.d/10165.bugfix | 1 + synapse/handlers/presence.py | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 13 deletions(-) create mode 100644 changelog.d/10165.bugfix diff --git a/changelog.d/10165.bugfix b/changelog.d/10165.bugfix new file mode 100644 index 000000000..8b1eeff35 --- /dev/null +++ b/changelog.d/10165.bugfix @@ -0,0 +1 @@ +Fix a bug where Synapse could send the same presence update to a remote twice. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 79508580a..44ed7a071 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -299,14 +299,14 @@ class BasePresenceHandler(abc.ABC): if not states: return - hosts_and_states = await get_interested_remotes( + hosts_to_states = await get_interested_remotes( self.store, self.presence_router, states, ) - for destinations, states in hosts_and_states: - self._federation.send_presence_to_destinations(states, destinations) + for destination, host_states in hosts_to_states.items(): + self._federation.send_presence_to_destinations(host_states, [destination]) async def send_full_presence_to_users(self, user_ids: Collection[str]): """ @@ -842,15 +842,15 @@ class PresenceHandler(BasePresenceHandler): if to_federation_ping: federation_presence_out_counter.inc(len(to_federation_ping)) - hosts_and_states = await get_interested_remotes( + hosts_to_states = await get_interested_remotes( self.store, self.presence_router, list(to_federation_ping.values()), ) - for destinations, states in hosts_and_states: + for destination, states in hosts_to_states.items(): self._federation_queue.send_presence_to_destinations( - states, destinations + states, [destination] ) async def _handle_timeouts(self) -> None: @@ -1975,7 +1975,7 @@ async def get_interested_remotes( store: DataStore, presence_router: PresenceRouter, states: List[UserPresenceState], -) -> List[Tuple[Collection[str], List[UserPresenceState]]]: +) -> Dict[str, Set[UserPresenceState]]: """Given a list of presence states figure out which remote servers should be sent which. @@ -1987,11 +1987,9 @@ async def get_interested_remotes( states: A list of incoming user presence updates. Returns: - A list of 2-tuples of destinations and states, where for - each tuple the list of UserPresenceState should be sent to each - destination + A map from destinations to presence states to send to that destination. """ - hosts_and_states = [] # type: List[Tuple[Collection[str], List[UserPresenceState]]] + hosts_and_states: Dict[str, Set[UserPresenceState]] = {} # First we look up the rooms each user is in (as well as any explicit # subscriptions), then for each distinct room we look up the remote @@ -2003,11 +2001,12 @@ async def get_interested_remotes( for room_id, states in room_ids_to_states.items(): user_ids = await store.get_users_in_room(room_id) hosts = {get_domain_from_id(user_id) for user_id in user_ids} - hosts_and_states.append((hosts, states)) + for host in hosts: + hosts_and_states.setdefault(host, set()).update(states) for user_id, states in users_to_states.items(): host = get_domain_from_id(user_id) - hosts_and_states.append(([host], states)) + hosts_and_states.setdefault(host, set()).update(states) return hosts_and_states From fb10a73e85ff4a5c090226d046b6b7ede7e57d6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Jun 2021 15:21:34 +0100 Subject: [PATCH 4/5] 1.36.0rc2 --- CHANGES.md | 11 +++++++++++ changelog.d/10149.bugfix | 1 - changelog.d/10163.bugfix | 1 - changelog.d/10165.bugfix | 1 - synapse/__init__.py | 2 +- 5 files changed, 12 insertions(+), 4 deletions(-) delete mode 100644 changelog.d/10149.bugfix delete mode 100644 changelog.d/10163.bugfix delete mode 100644 changelog.d/10165.bugfix diff --git a/CHANGES.md b/CHANGES.md index 48e9b55c8..cafb79124 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,14 @@ +Synapse 1.36.0rc2 (2021-06-11) +============================== + +Bugfixes +-------- + +- Fix a bug which caused presence updates to stop working some time after restart, when using a presence writer worker. ([\#10149](https://github.com/matrix-org/synapse/issues/10149)) +- Fix a bug when using federation sender worker where it would send out more presence updates than necessary, leading to high resource usage. Broke in v1.33.0. ([\#10163](https://github.com/matrix-org/synapse/issues/10163)) +- Fix a bug where Synapse could send the same presence update to a remote twice. ([\#10165](https://github.com/matrix-org/synapse/issues/10165)) + + Synapse 1.36.0rc1 (2021-06-08) ============================== diff --git a/changelog.d/10149.bugfix b/changelog.d/10149.bugfix deleted file mode 100644 index cb2d2eedb..000000000 --- a/changelog.d/10149.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug which caused presence updates to stop working some time after restart, when using a presence writer worker. diff --git a/changelog.d/10163.bugfix b/changelog.d/10163.bugfix deleted file mode 100644 index 7ccde6674..000000000 --- a/changelog.d/10163.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug when using federation sender worker where it would send out more presence updates than necessary, leading to high resource usage. Broke in v1.33.0. diff --git a/changelog.d/10165.bugfix b/changelog.d/10165.bugfix deleted file mode 100644 index 8b1eeff35..000000000 --- a/changelog.d/10165.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug where Synapse could send the same presence update to a remote twice. diff --git a/synapse/__init__.py b/synapse/__init__.py index 58261d04e..407ba14a7 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ try: except ImportError: pass -__version__ = "1.36.0rc1" +__version__ = "1.36.0rc2" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From cbf350db63f74b9eb3922a8ebe0284f71e248a3c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Jun 2021 15:30:42 +0100 Subject: [PATCH 5/5] Fixup changelog --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index cafb79124..aeec4fa5f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,7 @@ Synapse 1.36.0rc2 (2021-06-11) Bugfixes -------- -- Fix a bug which caused presence updates to stop working some time after restart, when using a presence writer worker. ([\#10149](https://github.com/matrix-org/synapse/issues/10149)) +- Fix a bug which caused presence updates to stop working some time after a restart, when using a presence writer worker. Broke in v1.33.0. ([\#10149](https://github.com/matrix-org/synapse/issues/10149)) - Fix a bug when using federation sender worker where it would send out more presence updates than necessary, leading to high resource usage. Broke in v1.33.0. ([\#10163](https://github.com/matrix-org/synapse/issues/10163)) - Fix a bug where Synapse could send the same presence update to a remote twice. ([\#10165](https://github.com/matrix-org/synapse/issues/10165))