From ae7a132f38404e9f654ab1b7c5dd84ba6a3efda6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 Jan 2017 13:40:09 +0000 Subject: [PATCH 1/4] Better handle 404 response for federation /send/ --- synapse/federation/transaction_queue.py | 1 + synapse/util/retryutils.py | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index d18f6b6cf..cb106c6a1 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -319,6 +319,7 @@ class TransactionQueue(object): destination, self.clock, self.store, + backoff_on_404=True, # If we get a 404 the other side has gone ) device_message_edus, device_stream_id, dev_list_id = ( diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index e2de7fce9..cc88a0b53 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -88,7 +88,7 @@ class RetryDestinationLimiter(object): def __init__(self, destination, clock, store, retry_interval, min_retry_interval=10 * 60 * 1000, max_retry_interval=24 * 60 * 60 * 1000, - multiplier_retry_interval=5,): + multiplier_retry_interval=5, backoff_on_404=False): """Marks the destination as "down" if an exception is thrown in the context, except for CodeMessageException with code < 500. @@ -107,6 +107,7 @@ class RetryDestinationLimiter(object): a failed request, in milliseconds. multiplier_retry_interval (int): The multiplier to use to increase the retry interval after a failed request. + backoff_on_404 (bool): Back off if we get a 404 """ self.clock = clock self.store = store @@ -116,6 +117,7 @@ class RetryDestinationLimiter(object): self.min_retry_interval = min_retry_interval self.max_retry_interval = max_retry_interval self.multiplier_retry_interval = multiplier_retry_interval + self.backoff_on_404 = backoff_on_404 def __enter__(self): pass @@ -123,7 +125,16 @@ class RetryDestinationLimiter(object): def __exit__(self, exc_type, exc_val, exc_tb): valid_err_code = False if exc_type is not None and issubclass(exc_type, CodeMessageException): - valid_err_code = exc_val.code != 429 and 0 <= exc_val.code < 500 + if exc_val.code < 400: + valid_err_code = True + elif exc_val.code == 404 and self.backoff_on_404: + valid_err_code = False + elif exc_val.code == 429: + valid_err_code = False + elif exc_val.code < 500: + valid_err_code = True + else: + valid_err_code = False if exc_type is None or valid_err_code: # We connected successfully. From 85c590105f87a6cd138f1509f70087aa0881cf2d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 Jan 2017 13:46:38 +0000 Subject: [PATCH 2/4] Comment --- synapse/util/retryutils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index cc88a0b53..0961dd5b2 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -125,6 +125,10 @@ class RetryDestinationLimiter(object): def __exit__(self, exc_type, exc_val, exc_tb): valid_err_code = False if exc_type is not None and issubclass(exc_type, CodeMessageException): + # Some error codes are perfectly fine for some APIs, whereas other + # APIs may expect to never received e.g. a 404. It's important to + # handle 404 as some remote servers will return a 404 when the HS + # has been decommissioned. if exc_val.code < 400: valid_err_code = True elif exc_val.code == 404 and self.backoff_on_404: From 4c0ec15bdcb8fbecf5e4f6cdd3017c9c53076972 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 Jan 2017 13:53:46 +0000 Subject: [PATCH 3/4] Comment --- synapse/util/retryutils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 0961dd5b2..5c7fc1afb 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -134,6 +134,8 @@ class RetryDestinationLimiter(object): elif exc_val.code == 404 and self.backoff_on_404: valid_err_code = False elif exc_val.code == 429: + # 429 is us being aggresively rate limited, so lets rate limit + # ourselves. valid_err_code = False elif exc_val.code < 500: valid_err_code = True From fe08db2713cb35e1424034d58d750ebdc52cedbc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 Jan 2017 15:21:32 +0000 Subject: [PATCH 4/4] Remove explicit < 400 check as apparently this is confusing --- synapse/util/retryutils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 5c7fc1afb..b94ae369c 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -129,9 +129,7 @@ class RetryDestinationLimiter(object): # APIs may expect to never received e.g. a 404. It's important to # handle 404 as some remote servers will return a 404 when the HS # has been decommissioned. - if exc_val.code < 400: - valid_err_code = True - elif exc_val.code == 404 and self.backoff_on_404: + if exc_val.code == 404 and self.backoff_on_404: valid_err_code = False elif exc_val.code == 429: # 429 is us being aggresively rate limited, so lets rate limit