forked from MirrorHub/synapse
Revert the deletion of stale devices due to performance issues. (#14662)
This commit is contained in:
parent
527366f962
commit
74b89c2761
7 changed files with 5 additions and 152 deletions
|
@ -1 +0,0 @@
|
||||||
Prune user's old devices on login if they have too many.
|
|
|
@ -1 +0,0 @@
|
||||||
Prune user's old devices on login if they have too many.
|
|
1
changelog.d/14662.removal
Normal file
1
changelog.d/14662.removal
Normal file
|
@ -0,0 +1 @@
|
||||||
|
(remove from changelog: unreleased) Revert the deletion of stale devices due to performance issues.
|
|
@ -52,7 +52,6 @@ from synapse.util import stringutils
|
||||||
from synapse.util.async_helpers import Linearizer
|
from synapse.util.async_helpers import Linearizer
|
||||||
from synapse.util.caches.expiringcache import ExpiringCache
|
from synapse.util.caches.expiringcache import ExpiringCache
|
||||||
from synapse.util.cancellation import cancellable
|
from synapse.util.cancellation import cancellable
|
||||||
from synapse.util.iterutils import batch_iter
|
|
||||||
from synapse.util.metrics import measure_func
|
from synapse.util.metrics import measure_func
|
||||||
from synapse.util.retryutils import NotRetryingDestination
|
from synapse.util.retryutils import NotRetryingDestination
|
||||||
|
|
||||||
|
@ -422,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler):
|
||||||
|
|
||||||
self._check_device_name_length(initial_device_display_name)
|
self._check_device_name_length(initial_device_display_name)
|
||||||
|
|
||||||
# Prune the user's device list if they already have a lot of devices.
|
|
||||||
await self._prune_too_many_devices(user_id)
|
|
||||||
|
|
||||||
if device_id is not None:
|
if device_id is not None:
|
||||||
new_device = await self.store.store_device(
|
new_device = await self.store.store_device(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
@ -456,33 +452,6 @@ class DeviceHandler(DeviceWorkerHandler):
|
||||||
|
|
||||||
raise errors.StoreError(500, "Couldn't generate a device ID.")
|
raise errors.StoreError(500, "Couldn't generate a device ID.")
|
||||||
|
|
||||||
async def _prune_too_many_devices(self, user_id: str) -> None:
|
|
||||||
"""Delete any excess old devices this user may have."""
|
|
||||||
device_ids = await self.store.check_too_many_devices_for_user(user_id, 100)
|
|
||||||
if not device_ids:
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info("Pruning %d old devices for user %s", len(device_ids), user_id)
|
|
||||||
|
|
||||||
# We don't want to block and try and delete tonnes of devices at once,
|
|
||||||
# so we cap the number of devices we delete synchronously.
|
|
||||||
first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
|
|
||||||
await self.delete_devices(user_id, first_batch)
|
|
||||||
|
|
||||||
if not remaining_device_ids:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Now spawn a background loop that deletes the rest.
|
|
||||||
async def _prune_too_many_devices_loop() -> None:
|
|
||||||
for batch in batch_iter(remaining_device_ids, 10):
|
|
||||||
await self.delete_devices(user_id, batch)
|
|
||||||
|
|
||||||
await self.clock.sleep(1)
|
|
||||||
|
|
||||||
run_as_background_process(
|
|
||||||
"_prune_too_many_devices_loop", _prune_too_many_devices_loop
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _delete_stale_devices(self) -> None:
|
async def _delete_stale_devices(self) -> None:
|
||||||
"""Background task that deletes devices which haven't been accessed for more than
|
"""Background task that deletes devices which haven't been accessed for more than
|
||||||
a configured time period.
|
a configured time period.
|
||||||
|
@ -512,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler):
|
||||||
device_ids = [d for d in device_ids if d != except_device_id]
|
device_ids = [d for d in device_ids if d != except_device_id]
|
||||||
await self.delete_devices(user_id, device_ids)
|
await self.delete_devices(user_id, device_ids)
|
||||||
|
|
||||||
async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
|
async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
|
||||||
"""Delete several devices
|
"""Delete several devices
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
|
@ -1569,77 +1569,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
|
||||||
|
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
async def check_too_many_devices_for_user(
|
|
||||||
self, user_id: str, limit: int
|
|
||||||
) -> List[str]:
|
|
||||||
"""Check if the user has a lot of devices, and if so return the set of
|
|
||||||
devices we can prune.
|
|
||||||
|
|
||||||
This does *not* return hidden devices or devices with E2E keys.
|
|
||||||
|
|
||||||
Returns at most `limit` number of devices, ordered by last seen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
num_devices = await self.db_pool.simple_select_one_onecol(
|
|
||||||
table="devices",
|
|
||||||
keyvalues={"user_id": user_id, "hidden": False},
|
|
||||||
retcol="COALESCE(COUNT(*), 0)",
|
|
||||||
desc="count_devices",
|
|
||||||
)
|
|
||||||
|
|
||||||
# We let users have up to ten devices without pruning.
|
|
||||||
if num_devices <= 10:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# We prune everything older than N days.
|
|
||||||
max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
|
|
||||||
|
|
||||||
if num_devices > 50:
|
|
||||||
# If the user has more than 50 devices, then we chose a last seen
|
|
||||||
# that ensures we keep at most 50 devices.
|
|
||||||
sql = """
|
|
||||||
SELECT last_seen FROM devices
|
|
||||||
LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
|
|
||||||
WHERE
|
|
||||||
user_id = ?
|
|
||||||
AND NOT hidden
|
|
||||||
AND last_seen IS NOT NULL
|
|
||||||
AND key_json IS NULL
|
|
||||||
ORDER BY last_seen DESC
|
|
||||||
LIMIT 1
|
|
||||||
OFFSET 50
|
|
||||||
"""
|
|
||||||
|
|
||||||
rows = await self.db_pool.execute(
|
|
||||||
"check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
|
|
||||||
)
|
|
||||||
if rows:
|
|
||||||
max_last_seen = max(rows[0][0], max_last_seen)
|
|
||||||
|
|
||||||
# Now fetch the devices to delete.
|
|
||||||
sql = """
|
|
||||||
SELECT device_id FROM devices
|
|
||||||
LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
|
|
||||||
WHERE
|
|
||||||
user_id = ?
|
|
||||||
AND NOT hidden
|
|
||||||
AND last_seen < ?
|
|
||||||
AND key_json IS NULL
|
|
||||||
ORDER BY last_seen
|
|
||||||
LIMIT ?
|
|
||||||
"""
|
|
||||||
|
|
||||||
def check_too_many_devices_for_user_txn(
|
|
||||||
txn: LoggingTransaction,
|
|
||||||
) -> List[str]:
|
|
||||||
txn.execute(sql, (user_id, max_last_seen, limit))
|
|
||||||
return [device_id for device_id, in txn]
|
|
||||||
|
|
||||||
return await self.db_pool.runInteraction(
|
|
||||||
"check_too_many_devices_for_user",
|
|
||||||
check_too_many_devices_for_user_txn,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
|
class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
|
||||||
# Because we have write access, this will be a StreamIdGenerator
|
# Because we have write access, this will be a StreamIdGenerator
|
||||||
|
@ -1698,7 +1627,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
|
||||||
values={},
|
values={},
|
||||||
insertion_values={
|
insertion_values={
|
||||||
"display_name": initial_device_display_name,
|
"display_name": initial_device_display_name,
|
||||||
"last_seen": self._clock.time_msec(),
|
|
||||||
"hidden": False,
|
"hidden": False,
|
||||||
},
|
},
|
||||||
desc="store_device",
|
desc="store_device",
|
||||||
|
@ -1744,15 +1672,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
|
||||||
)
|
)
|
||||||
raise StoreError(500, "Problem storing device.")
|
raise StoreError(500, "Problem storing device.")
|
||||||
|
|
||||||
@cached(max_entries=0)
|
async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
|
||||||
async def delete_device(self, user_id: str, device_id: str) -> None:
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
# Note: sometimes deleting rows out of `device_inbox` can take a long time,
|
|
||||||
# so we use a cache so that we deduplicate in flight requests to delete
|
|
||||||
# devices.
|
|
||||||
@cachedList(cached_method_name="delete_device", list_name="device_ids")
|
|
||||||
async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
|
|
||||||
"""Deletes several devices.
|
"""Deletes several devices.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -1789,8 +1709,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
|
||||||
for device_id in device_ids:
|
for device_id in device_ids:
|
||||||
self.device_id_exists_cache.invalidate((user_id, device_id))
|
self.device_id_exists_cache.invalidate((user_id, device_id))
|
||||||
|
|
||||||
return {}
|
|
||||||
|
|
||||||
async def update_device(
|
async def update_device(
|
||||||
self, user_id: str, device_id: str, new_display_name: Optional[str] = None
|
self, user_id: str, device_id: str, new_display_name: Optional[str] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
|
@ -20,8 +20,6 @@ from twisted.test.proto_helpers import MemoryReactor
|
||||||
|
|
||||||
from synapse.api.errors import NotFoundError, SynapseError
|
from synapse.api.errors import NotFoundError, SynapseError
|
||||||
from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
|
from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
|
||||||
from synapse.rest import admin
|
|
||||||
from synapse.rest.client import account, login
|
|
||||||
from synapse.server import HomeServer
|
from synapse.server import HomeServer
|
||||||
from synapse.util import Clock
|
from synapse.util import Clock
|
||||||
|
|
||||||
|
@ -32,12 +30,6 @@ user2 = "@theresa:bbb"
|
||||||
|
|
||||||
|
|
||||||
class DeviceTestCase(unittest.HomeserverTestCase):
|
class DeviceTestCase(unittest.HomeserverTestCase):
|
||||||
servlets = [
|
|
||||||
login.register_servlets,
|
|
||||||
admin.register_servlets,
|
|
||||||
account.register_servlets,
|
|
||||||
]
|
|
||||||
|
|
||||||
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
|
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
|
||||||
hs = self.setup_test_homeserver("server", federation_http_client=None)
|
hs = self.setup_test_homeserver("server", federation_http_client=None)
|
||||||
handler = hs.get_device_handler()
|
handler = hs.get_device_handler()
|
||||||
|
@ -123,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
|
||||||
"device_id": "xyz",
|
"device_id": "xyz",
|
||||||
"display_name": "display 0",
|
"display_name": "display 0",
|
||||||
"last_seen_ip": None,
|
"last_seen_ip": None,
|
||||||
"last_seen_ts": 1000000,
|
"last_seen_ts": None,
|
||||||
},
|
},
|
||||||
device_map["xyz"],
|
device_map["xyz"],
|
||||||
)
|
)
|
||||||
|
@ -237,29 +229,6 @@ class DeviceTestCase(unittest.HomeserverTestCase):
|
||||||
NotFoundError,
|
NotFoundError,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_login_delete_old_devices(self) -> None:
|
|
||||||
"""Delete old devices if the user already has too many."""
|
|
||||||
|
|
||||||
user_id = self.register_user("user", "pass")
|
|
||||||
|
|
||||||
# Create a bunch of devices
|
|
||||||
for _ in range(50):
|
|
||||||
self.login("user", "pass")
|
|
||||||
self.reactor.advance(1)
|
|
||||||
|
|
||||||
# Advance the clock for ages (as we only delete old devices)
|
|
||||||
self.reactor.advance(60 * 60 * 24 * 300)
|
|
||||||
|
|
||||||
# Log in again to start the pruning
|
|
||||||
self.login("user", "pass")
|
|
||||||
|
|
||||||
# Give the background job time to do its thing
|
|
||||||
self.reactor.pump([1.0] * 100)
|
|
||||||
|
|
||||||
# We should now only have the most recent device.
|
|
||||||
devices = self.get_success(self.handler.get_devices_by_user(user_id))
|
|
||||||
self.assertEqual(len(devices), 1)
|
|
||||||
|
|
||||||
def _record_users(self) -> None:
|
def _record_users(self) -> None:
|
||||||
# check this works for both devices which have a recorded client_ip,
|
# check this works for both devices which have a recorded client_ip,
|
||||||
# and those which don't.
|
# and those which don't.
|
||||||
|
|
|
@ -170,8 +170,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
last_seen = self.clock.time_msec()
|
|
||||||
|
|
||||||
if after_persisting:
|
if after_persisting:
|
||||||
# Trigger the storage loop
|
# Trigger the storage loop
|
||||||
self.reactor.advance(10)
|
self.reactor.advance(10)
|
||||||
|
@ -192,7 +190,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
|
||||||
"device_id": device_id,
|
"device_id": device_id,
|
||||||
"ip": None,
|
"ip": None,
|
||||||
"user_agent": None,
|
"user_agent": None,
|
||||||
"last_seen": last_seen,
|
"last_seen": None,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue