From ff0e8946569a2627e3b0e06aae7e7612fcc726c1 Mon Sep 17 00:00:00 2001
From: reivilibre <38398653+reivilibre@users.noreply.github.com>
Date: Thu, 13 Aug 2020 12:35:04 +0100
Subject: [PATCH] Drop federation transmission queues during a significant
 remote outage. (#7864)

* Empty federation transmission queues when we are backing off.

Fixes #7828.

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>

* Address feedback

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>

* Reword newsfile
---
 changelog.d/7864.bugfix                       |  1 +
 .../sender/per_destination_queue.py           | 22 +++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 changelog.d/7864.bugfix

diff --git a/changelog.d/7864.bugfix b/changelog.d/7864.bugfix
new file mode 100644
index 000000000..8623355fe
--- /dev/null
+++ b/changelog.d/7864.bugfix
@@ -0,0 +1 @@
+Fix a memory leak by limiting the length of time that messages will be queued for a remote server that has been unreachable.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index dd150f89a..8cbc23d90 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -337,6 +337,28 @@ class PerDestinationQueue(object):
                     (e.retry_last_ts + e.retry_interval) / 1000.0
                 ),
             )
+
+            if e.retry_interval > 60 * 60 * 1000:
+                # we won't retry for another hour!
+                # (this suggests a significant outage)
+                # We drop pending PDUs and EDUs because otherwise they will
+                # rack up indefinitely.
+                # Note that:
+                # - the EDUs that are being dropped here are those that we can
+                #   afford to drop (specifically, only typing notifications,
+                #   read receipts and presence updates are being dropped here)
+                # - Other EDUs such as to_device messages are queued with a
+                #   different mechanism
+                # - this is all volatile state that would be lost if the
+                #   federation sender restarted anyway
+
+                # dropping read receipts is a bit sad but should be solved
+                # through another mechanism, because this is all volatile!
+                self._pending_pdus = []
+                self._pending_edus = []
+                self._pending_edus_keyed = {}
+                self._pending_presence = {}
+                self._pending_rrs = {}
         except FederationDeniedError as e:
             logger.info(e)
         except HttpResponseException as e: