mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 02:43:45 +01:00
Avoid deep recursion in appservice recovery (#5885)
Hopefully, this will fix a stack overflow when recovering an appservice. The recursion here leads to a huge chain of deferred callbacks, which then overflows the stack when the chain completes. `inlineCallbacks` makes a better job of this if we use iteration instead. Clean up the code a bit too, while we're there.
This commit is contained in:
parent
c886f976e0
commit
baa3f4a80d
2 changed files with 25 additions and 17 deletions
1
changelog.d/5885.bugfix
Normal file
1
changelog.d/5885.bugfix
Normal file
|
@ -0,0 +1 @@
|
|||
Fix stack overflow when recovering an appservice which had an outage.
|
|
@ -224,7 +224,9 @@ class _Recoverer(object):
|
|||
"as-recoverer-%s" % (self.service.id,), self.retry
|
||||
)
|
||||
|
||||
self.clock.call_later((2 ** self.backoff_counter), _retry)
|
||||
delay = 2 ** self.backoff_counter
|
||||
logger.info("Scheduling retries on %s in %fs", self.service.id, delay)
|
||||
self.clock.call_later(delay, _retry)
|
||||
|
||||
def _backoff(self):
|
||||
# cap the backoff to be around 8.5min => (2^9) = 512 secs
|
||||
|
@ -234,25 +236,30 @@ class _Recoverer(object):
|
|||
|
||||
@defer.inlineCallbacks
|
||||
def retry(self):
|
||||
logger.info("Starting retries on %s", self.service.id)
|
||||
try:
|
||||
txn = yield self.store.get_oldest_unsent_txn(self.service)
|
||||
if txn:
|
||||
while True:
|
||||
txn = yield self.store.get_oldest_unsent_txn(self.service)
|
||||
if not txn:
|
||||
# nothing left: we're done!
|
||||
self.callback(self)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Retrying transaction %s for AS ID %s", txn.id, txn.service.id
|
||||
)
|
||||
sent = yield txn.send(self.as_api)
|
||||
if sent:
|
||||
yield txn.complete(self.store)
|
||||
# reset the backoff counter and retry immediately
|
||||
self.backoff_counter = 1
|
||||
yield self.retry()
|
||||
else:
|
||||
self._backoff()
|
||||
else:
|
||||
self._set_service_recovered()
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
self._backoff()
|
||||
if not sent:
|
||||
break
|
||||
|
||||
def _set_service_recovered(self):
|
||||
self.callback(self)
|
||||
yield txn.complete(self.store)
|
||||
|
||||
# reset the backoff counter and then process the next transaction
|
||||
self.backoff_counter = 1
|
||||
|
||||
except Exception:
|
||||
logger.exception("Unexpected error running retries")
|
||||
|
||||
# we didn't manage to send all of the transactions before we got an error of
|
||||
# some flavour: reschedule the next retry.
|
||||
self._backoff()
|
||||
|
|
Loading…
Reference in a new issue