mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 19:53:51 +01:00
Avoid deep recursion in appservice recovery (#5885)
Hopefully, this will fix a stack overflow when recovering an appservice. The recursion here leads to a huge chain of deferred callbacks, which then overflows the stack when the chain completes. `inlineCallbacks` makes a better job of this if we use iteration instead. Clean up the code a bit too, while we're there.
This commit is contained in:
parent
c886f976e0
commit
baa3f4a80d
2 changed files with 25 additions and 17 deletions
1
changelog.d/5885.bugfix
Normal file
1
changelog.d/5885.bugfix
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Fix stack overflow when recovering an appservice which had an outage.
|
|
@ -224,7 +224,9 @@ class _Recoverer(object):
|
||||||
"as-recoverer-%s" % (self.service.id,), self.retry
|
"as-recoverer-%s" % (self.service.id,), self.retry
|
||||||
)
|
)
|
||||||
|
|
||||||
self.clock.call_later((2 ** self.backoff_counter), _retry)
|
delay = 2 ** self.backoff_counter
|
||||||
|
logger.info("Scheduling retries on %s in %fs", self.service.id, delay)
|
||||||
|
self.clock.call_later(delay, _retry)
|
||||||
|
|
||||||
def _backoff(self):
|
def _backoff(self):
|
||||||
# cap the backoff to be around 8.5min => (2^9) = 512 secs
|
# cap the backoff to be around 8.5min => (2^9) = 512 secs
|
||||||
|
@ -234,25 +236,30 @@ class _Recoverer(object):
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def retry(self):
|
def retry(self):
|
||||||
|
logger.info("Starting retries on %s", self.service.id)
|
||||||
try:
|
try:
|
||||||
|
while True:
|
||||||
txn = yield self.store.get_oldest_unsent_txn(self.service)
|
txn = yield self.store.get_oldest_unsent_txn(self.service)
|
||||||
if txn:
|
if not txn:
|
||||||
|
# nothing left: we're done!
|
||||||
|
self.callback(self)
|
||||||
|
return
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Retrying transaction %s for AS ID %s", txn.id, txn.service.id
|
"Retrying transaction %s for AS ID %s", txn.id, txn.service.id
|
||||||
)
|
)
|
||||||
sent = yield txn.send(self.as_api)
|
sent = yield txn.send(self.as_api)
|
||||||
if sent:
|
if not sent:
|
||||||
yield txn.complete(self.store)
|
break
|
||||||
# reset the backoff counter and retry immediately
|
|
||||||
self.backoff_counter = 1
|
|
||||||
yield self.retry()
|
|
||||||
else:
|
|
||||||
self._backoff()
|
|
||||||
else:
|
|
||||||
self._set_service_recovered()
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception(e)
|
|
||||||
self._backoff()
|
|
||||||
|
|
||||||
def _set_service_recovered(self):
|
yield txn.complete(self.store)
|
||||||
self.callback(self)
|
|
||||||
|
# reset the backoff counter and then process the next transaction
|
||||||
|
self.backoff_counter = 1
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Unexpected error running retries")
|
||||||
|
|
||||||
|
# we didn't manage to send all of the transactions before we got an error of
|
||||||
|
# some flavour: reschedule the next retry.
|
||||||
|
self._backoff()
|
||||||
|
|
Loading…
Reference in a new issue