forked from MirrorHub/synapse
Don't hammer the database for destination retry timings every ~5mins (#10036)
This commit is contained in:
parent
e8ac9ac8ca
commit
3e831f24ff
10 changed files with 62 additions and 76 deletions
1
changelog.d/10036.misc
Normal file
1
changelog.d/10036.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Properly invalidate caches for destination retry timings every (instead of expiring entries every 5 minutes).
|
|
@ -61,7 +61,6 @@ from synapse.replication.slave.storage.pushers import SlavedPusherStore
|
||||||
from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
|
from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
|
||||||
from synapse.replication.slave.storage.registration import SlavedRegistrationStore
|
from synapse.replication.slave.storage.registration import SlavedRegistrationStore
|
||||||
from synapse.replication.slave.storage.room import RoomStore
|
from synapse.replication.slave.storage.room import RoomStore
|
||||||
from synapse.replication.slave.storage.transactions import SlavedTransactionStore
|
|
||||||
from synapse.rest.admin import register_servlets_for_media_repo
|
from synapse.rest.admin import register_servlets_for_media_repo
|
||||||
from synapse.rest.client.v1 import events, login, presence, room
|
from synapse.rest.client.v1 import events, login, presence, room
|
||||||
from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
|
from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
|
||||||
|
@ -237,7 +236,6 @@ class GenericWorkerSlavedStore(
|
||||||
DirectoryStore,
|
DirectoryStore,
|
||||||
SlavedApplicationServiceStore,
|
SlavedApplicationServiceStore,
|
||||||
SlavedRegistrationStore,
|
SlavedRegistrationStore,
|
||||||
SlavedTransactionStore,
|
|
||||||
SlavedProfileStore,
|
SlavedProfileStore,
|
||||||
SlavedClientIpStore,
|
SlavedClientIpStore,
|
||||||
SlavedFilteringStore,
|
SlavedFilteringStore,
|
||||||
|
|
|
@ -160,7 +160,7 @@ class Authenticator:
|
||||||
# If we get a valid signed request from the other side, its probably
|
# If we get a valid signed request from the other side, its probably
|
||||||
# alive
|
# alive
|
||||||
retry_timings = await self.store.get_destination_retry_timings(origin)
|
retry_timings = await self.store.get_destination_retry_timings(origin)
|
||||||
if retry_timings and retry_timings["retry_last_ts"]:
|
if retry_timings and retry_timings.retry_last_ts:
|
||||||
run_in_background(self._reset_retry_timings, origin)
|
run_in_background(self._reset_retry_timings, origin)
|
||||||
|
|
||||||
return origin
|
return origin
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
# Copyright 2015, 2016 OpenMarket Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from synapse.storage.databases.main.transactions import TransactionStore
|
|
||||||
|
|
||||||
from ._base import BaseSlavedStore
|
|
||||||
|
|
||||||
|
|
||||||
class SlavedTransactionStore(TransactionStore, BaseSlavedStore):
|
|
||||||
pass
|
|
|
@ -67,7 +67,7 @@ from .state import StateStore
|
||||||
from .stats import StatsStore
|
from .stats import StatsStore
|
||||||
from .stream import StreamStore
|
from .stream import StreamStore
|
||||||
from .tags import TagsStore
|
from .tags import TagsStore
|
||||||
from .transactions import TransactionStore
|
from .transactions import TransactionWorkerStore
|
||||||
from .ui_auth import UIAuthStore
|
from .ui_auth import UIAuthStore
|
||||||
from .user_directory import UserDirectoryStore
|
from .user_directory import UserDirectoryStore
|
||||||
from .user_erasure_store import UserErasureStore
|
from .user_erasure_store import UserErasureStore
|
||||||
|
@ -83,7 +83,7 @@ class DataStore(
|
||||||
StreamStore,
|
StreamStore,
|
||||||
ProfileStore,
|
ProfileStore,
|
||||||
PresenceStore,
|
PresenceStore,
|
||||||
TransactionStore,
|
TransactionWorkerStore,
|
||||||
DirectoryStore,
|
DirectoryStore,
|
||||||
KeyStore,
|
KeyStore,
|
||||||
StateStore,
|
StateStore,
|
||||||
|
|
|
@ -16,13 +16,15 @@ import logging
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
|
import attr
|
||||||
from canonicaljson import encode_canonical_json
|
from canonicaljson import encode_canonical_json
|
||||||
|
|
||||||
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
||||||
from synapse.storage._base import SQLBaseStore, db_to_json
|
from synapse.storage._base import db_to_json
|
||||||
from synapse.storage.database import DatabasePool, LoggingTransaction
|
from synapse.storage.database import DatabasePool, LoggingTransaction
|
||||||
|
from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
|
||||||
from synapse.types import JsonDict
|
from synapse.types import JsonDict
|
||||||
from synapse.util.caches.expiringcache import ExpiringCache
|
from synapse.util.caches.descriptors import cached
|
||||||
|
|
||||||
db_binary_type = memoryview
|
db_binary_type = memoryview
|
||||||
|
|
||||||
|
@ -38,10 +40,23 @@ _UpdateTransactionRow = namedtuple(
|
||||||
"_TransactionRow", ("response_code", "response_json")
|
"_TransactionRow", ("response_code", "response_json")
|
||||||
)
|
)
|
||||||
|
|
||||||
SENTINEL = object()
|
|
||||||
|
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||||
|
class DestinationRetryTimings:
|
||||||
|
"""The current destination retry timing info for a remote server."""
|
||||||
|
|
||||||
|
# The first time we tried and failed to reach the remote server, in ms.
|
||||||
|
failure_ts: int
|
||||||
|
|
||||||
|
# The last time we tried and failed to reach the remote server, in ms.
|
||||||
|
retry_last_ts: int
|
||||||
|
|
||||||
|
# How long since the last time we tried to reach the remote server before
|
||||||
|
# trying again, in ms.
|
||||||
|
retry_interval: int
|
||||||
|
|
||||||
|
|
||||||
class TransactionWorkerStore(SQLBaseStore):
|
class TransactionWorkerStore(CacheInvalidationWorkerStore):
|
||||||
def __init__(self, database: DatabasePool, db_conn, hs):
|
def __init__(self, database: DatabasePool, db_conn, hs):
|
||||||
super().__init__(database, db_conn, hs)
|
super().__init__(database, db_conn, hs)
|
||||||
|
|
||||||
|
@ -60,19 +75,6 @@ class TransactionWorkerStore(SQLBaseStore):
|
||||||
"_cleanup_transactions", _cleanup_transactions_txn
|
"_cleanup_transactions", _cleanup_transactions_txn
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TransactionStore(TransactionWorkerStore):
|
|
||||||
"""A collection of queries for handling PDUs."""
|
|
||||||
|
|
||||||
def __init__(self, database: DatabasePool, db_conn, hs):
|
|
||||||
super().__init__(database, db_conn, hs)
|
|
||||||
|
|
||||||
self._destination_retry_cache = ExpiringCache(
|
|
||||||
cache_name="get_destination_retry_timings",
|
|
||||||
clock=self._clock,
|
|
||||||
expiry_ms=5 * 60 * 1000,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def get_received_txn_response(
|
async def get_received_txn_response(
|
||||||
self, transaction_id: str, origin: str
|
self, transaction_id: str, origin: str
|
||||||
) -> Optional[Tuple[int, JsonDict]]:
|
) -> Optional[Tuple[int, JsonDict]]:
|
||||||
|
@ -145,7 +147,11 @@ class TransactionStore(TransactionWorkerStore):
|
||||||
desc="set_received_txn_response",
|
desc="set_received_txn_response",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_destination_retry_timings(self, destination):
|
@cached(max_entries=10000)
|
||||||
|
async def get_destination_retry_timings(
|
||||||
|
self,
|
||||||
|
destination: str,
|
||||||
|
) -> Optional[DestinationRetryTimings]:
|
||||||
"""Gets the current retry timings (if any) for a given destination.
|
"""Gets the current retry timings (if any) for a given destination.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -156,34 +162,29 @@ class TransactionStore(TransactionWorkerStore):
|
||||||
Otherwise a dict for the retry scheme
|
Otherwise a dict for the retry scheme
|
||||||
"""
|
"""
|
||||||
|
|
||||||
result = self._destination_retry_cache.get(destination, SENTINEL)
|
|
||||||
if result is not SENTINEL:
|
|
||||||
return result
|
|
||||||
|
|
||||||
result = await self.db_pool.runInteraction(
|
result = await self.db_pool.runInteraction(
|
||||||
"get_destination_retry_timings",
|
"get_destination_retry_timings",
|
||||||
self._get_destination_retry_timings,
|
self._get_destination_retry_timings,
|
||||||
destination,
|
destination,
|
||||||
)
|
)
|
||||||
|
|
||||||
# We don't hugely care about race conditions between getting and
|
|
||||||
# invalidating the cache, since we time out fairly quickly anyway.
|
|
||||||
self._destination_retry_cache[destination] = result
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _get_destination_retry_timings(self, txn, destination):
|
def _get_destination_retry_timings(
|
||||||
|
self, txn, destination: str
|
||||||
|
) -> Optional[DestinationRetryTimings]:
|
||||||
result = self.db_pool.simple_select_one_txn(
|
result = self.db_pool.simple_select_one_txn(
|
||||||
txn,
|
txn,
|
||||||
table="destinations",
|
table="destinations",
|
||||||
keyvalues={"destination": destination},
|
keyvalues={"destination": destination},
|
||||||
retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"),
|
retcols=("failure_ts", "retry_last_ts", "retry_interval"),
|
||||||
allow_none=True,
|
allow_none=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# check we have a row and retry_last_ts is not null or zero
|
# check we have a row and retry_last_ts is not null or zero
|
||||||
# (retry_last_ts can't be negative)
|
# (retry_last_ts can't be negative)
|
||||||
if result and result["retry_last_ts"]:
|
if result and result["retry_last_ts"]:
|
||||||
return result
|
return DestinationRetryTimings(**result)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -204,7 +205,6 @@ class TransactionStore(TransactionWorkerStore):
|
||||||
retry_interval: how long until next retry in ms
|
retry_interval: how long until next retry in ms
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._destination_retry_cache.pop(destination, None)
|
|
||||||
if self.database_engine.can_native_upsert:
|
if self.database_engine.can_native_upsert:
|
||||||
return await self.db_pool.runInteraction(
|
return await self.db_pool.runInteraction(
|
||||||
"set_destination_retry_timings",
|
"set_destination_retry_timings",
|
||||||
|
@ -252,6 +252,10 @@ class TransactionStore(TransactionWorkerStore):
|
||||||
|
|
||||||
txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
|
txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
|
||||||
|
|
||||||
|
self._invalidate_cache_and_stream(
|
||||||
|
txn, self.get_destination_retry_timings, (destination,)
|
||||||
|
)
|
||||||
|
|
||||||
def _set_destination_retry_timings_emulated(
|
def _set_destination_retry_timings_emulated(
|
||||||
self, txn, destination, failure_ts, retry_last_ts, retry_interval
|
self, txn, destination, failure_ts, retry_last_ts, retry_interval
|
||||||
):
|
):
|
||||||
|
@ -295,6 +299,10 @@ class TransactionStore(TransactionWorkerStore):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self._invalidate_cache_and_stream(
|
||||||
|
txn, self.get_destination_retry_timings, (destination,)
|
||||||
|
)
|
||||||
|
|
||||||
async def store_destination_rooms_entries(
|
async def store_destination_rooms_entries(
|
||||||
self,
|
self,
|
||||||
destinations: Iterable[str],
|
destinations: Iterable[str],
|
||||||
|
|
|
@ -82,11 +82,9 @@ async def get_retry_limiter(destination, clock, store, ignore_backoff=False, **k
|
||||||
retry_timings = await store.get_destination_retry_timings(destination)
|
retry_timings = await store.get_destination_retry_timings(destination)
|
||||||
|
|
||||||
if retry_timings:
|
if retry_timings:
|
||||||
failure_ts = retry_timings["failure_ts"]
|
failure_ts = retry_timings.failure_ts
|
||||||
retry_last_ts, retry_interval = (
|
retry_last_ts = retry_timings.retry_last_ts
|
||||||
retry_timings["retry_last_ts"],
|
retry_interval = retry_timings.retry_interval
|
||||||
retry_timings["retry_interval"],
|
|
||||||
)
|
|
||||||
|
|
||||||
now = int(clock.time_msec())
|
now = int(clock.time_msec())
|
||||||
|
|
||||||
|
|
|
@ -89,14 +89,8 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
|
||||||
self.event_source = hs.get_event_sources().sources["typing"]
|
self.event_source = hs.get_event_sources().sources["typing"]
|
||||||
|
|
||||||
self.datastore = hs.get_datastore()
|
self.datastore = hs.get_datastore()
|
||||||
retry_timings_res = {
|
|
||||||
"destination": "",
|
|
||||||
"retry_last_ts": 0,
|
|
||||||
"retry_interval": 0,
|
|
||||||
"failure_ts": None,
|
|
||||||
}
|
|
||||||
self.datastore.get_destination_retry_timings = Mock(
|
self.datastore.get_destination_retry_timings = Mock(
|
||||||
return_value=defer.succeed(retry_timings_res)
|
return_value=defer.succeed(None)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.datastore.get_device_updates_by_remote = Mock(
|
self.datastore.get_device_updates_by_remote = Mock(
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from synapse.storage.databases.main.transactions import DestinationRetryTimings
|
||||||
from synapse.util.retryutils import MAX_RETRY_INTERVAL
|
from synapse.util.retryutils import MAX_RETRY_INTERVAL
|
||||||
|
|
||||||
from tests.unittest import HomeserverTestCase
|
from tests.unittest import HomeserverTestCase
|
||||||
|
@ -36,8 +37,11 @@ class TransactionStoreTestCase(HomeserverTestCase):
|
||||||
d = self.store.get_destination_retry_timings("example.com")
|
d = self.store.get_destination_retry_timings("example.com")
|
||||||
r = self.get_success(d)
|
r = self.get_success(d)
|
||||||
|
|
||||||
self.assert_dict(
|
self.assertEqual(
|
||||||
{"retry_last_ts": 50, "retry_interval": 100, "failure_ts": 1000}, r
|
DestinationRetryTimings(
|
||||||
|
retry_last_ts=50, retry_interval=100, failure_ts=1000
|
||||||
|
),
|
||||||
|
r,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_initial_set_transactions(self):
|
def test_initial_set_transactions(self):
|
||||||
|
|
|
@ -51,10 +51,12 @@ class RetryLimiterTestCase(HomeserverTestCase):
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
|
||||||
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
||||||
self.assertEqual(new_timings["failure_ts"], failure_ts)
|
self.assertEqual(new_timings.failure_ts, failure_ts)
|
||||||
self.assertEqual(new_timings["retry_last_ts"], failure_ts)
|
self.assertEqual(new_timings.retry_last_ts, failure_ts)
|
||||||
self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL)
|
self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL)
|
||||||
|
|
||||||
# now if we try again we should get a failure
|
# now if we try again we should get a failure
|
||||||
self.get_failure(
|
self.get_failure(
|
||||||
|
@ -77,14 +79,16 @@ class RetryLimiterTestCase(HomeserverTestCase):
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
|
||||||
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
||||||
self.assertEqual(new_timings["failure_ts"], failure_ts)
|
self.assertEqual(new_timings.failure_ts, failure_ts)
|
||||||
self.assertEqual(new_timings["retry_last_ts"], retry_ts)
|
self.assertEqual(new_timings.retry_last_ts, retry_ts)
|
||||||
self.assertGreaterEqual(
|
self.assertGreaterEqual(
|
||||||
new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
|
new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
|
||||||
)
|
)
|
||||||
self.assertLessEqual(
|
self.assertLessEqual(
|
||||||
new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
|
new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
|
||||||
)
|
)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in a new issue