From 3a569fb2000e972efe2e145d57ffd9441ee41665 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 8 Apr 2021 17:30:01 +0100
Subject: [PATCH] Fix sharded federation sender sometimes using 100% CPU.

We pull all destinations requiring catchup from the DB in batches.
However, if all those destinations get filtered out (due to the
federation sender being sharded), then the `last_processed` destination
doesn't get updated, and we keep requesting the same set repeatedly.
---
 changelog.d/9770.bugfix               | 1 +
 synapse/federation/sender/__init__.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/9770.bugfix

diff --git a/changelog.d/9770.bugfix b/changelog.d/9770.bugfix
new file mode 100644
index 000000000..baf93138d
--- /dev/null
+++ b/changelog.d/9770.bugfix
@@ -0,0 +1 @@
+Fix bug where sharded federation senders could get stuck repeatedly querying the DB in a loop, using lots of CPU.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 98bfce22f..d821dcbf6 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -734,16 +734,18 @@ class FederationSender(AbstractFederationSender):
                 self._catchup_after_startup_timer = None
                 break
 
+            last_processed = destinations_to_wake[-1]
+
             destinations_to_wake = [
                 d
                 for d in destinations_to_wake
                 if self._federation_shard_config.should_handle(self._instance_name, d)
             ]
 
-            for last_processed in destinations_to_wake:
+            for destination in destinations_to_wake:
                 logger.info(
                     "Destination %s has outstanding catch-up, waking up.",
                     last_processed,
                 )
-                self.wake_destination(last_processed)
+                self.wake_destination(destination)
                 await self.clock.sleep(CATCH_UP_STARTUP_INTERVAL_SEC)