forked from MirrorHub/synapse
Prevent local quarantined media from being claimed by media retention (#12972)
This commit is contained in:
parent
f7baffd8ec
commit
a47636c570
6 changed files with 185 additions and 29 deletions
1
changelog.d/12972.feature
Normal file
1
changelog.d/12972.feature
Normal file
|
@ -0,0 +1 @@
|
|||
Add new `media_retention` options to the homeserver config for routinely cleaning up non-recently accessed media.
|
|
@ -1583,6 +1583,12 @@ been accessed, the media's creation time is used instead. Both thumbnails
|
|||
and the original media will be removed. If either of these options are unset,
|
||||
then media of that type will not be purged.
|
||||
|
||||
Local or cached remote media that has been
|
||||
[quarantined](../../admin_api/media_admin_api.md#quarantining-media-in-a-room)
|
||||
will not be deleted. Similarly, local media that has been marked as
|
||||
[protected from quarantine](../../admin_api/media_admin_api.md#protecting-media-from-being-quarantined)
|
||||
will not be deleted.
|
||||
|
||||
Example configuration:
|
||||
```yaml
|
||||
media_retention:
|
||||
|
|
|
@ -83,7 +83,7 @@ class QuarantineMediaByUser(RestServlet):
|
|||
requester = await self.auth.get_user_by_req(request)
|
||||
await assert_user_is_admin(self.auth, requester.user)
|
||||
|
||||
logging.info("Quarantining local media by user: %s", user_id)
|
||||
logging.info("Quarantining media by user: %s", user_id)
|
||||
|
||||
# Quarantine all media this user has uploaded
|
||||
num_quarantined = await self.store.quarantine_media_ids_by_user(
|
||||
|
@ -112,7 +112,7 @@ class QuarantineMediaByID(RestServlet):
|
|||
requester = await self.auth.get_user_by_req(request)
|
||||
await assert_user_is_admin(self.auth, requester.user)
|
||||
|
||||
logging.info("Quarantining local media by ID: %s/%s", server_name, media_id)
|
||||
logging.info("Quarantining media by ID: %s/%s", server_name, media_id)
|
||||
|
||||
# Quarantine this media id
|
||||
await self.store.quarantine_media_by_id(
|
||||
|
@ -140,9 +140,7 @@ class UnquarantineMediaByID(RestServlet):
|
|||
) -> Tuple[int, JsonDict]:
|
||||
await assert_requester_is_admin(self.auth, request)
|
||||
|
||||
logging.info(
|
||||
"Remove from quarantine local media by ID: %s/%s", server_name, media_id
|
||||
)
|
||||
logging.info("Remove from quarantine media by ID: %s/%s", server_name, media_id)
|
||||
|
||||
# Remove from quarantine this media id
|
||||
await self.store.quarantine_media_by_id(server_name, media_id, None)
|
||||
|
|
|
@ -919,10 +919,14 @@ class MediaRepository:
|
|||
await self.delete_old_local_media(
|
||||
before_ts=local_media_threshold_timestamp_ms,
|
||||
keep_profiles=True,
|
||||
delete_quarantined_media=False,
|
||||
delete_protected_media=False,
|
||||
)
|
||||
|
||||
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
|
||||
old_media = await self.store.get_remote_media_before(before_ts)
|
||||
old_media = await self.store.get_remote_media_ids(
|
||||
before_ts, include_quarantined_media=False
|
||||
)
|
||||
|
||||
deleted = 0
|
||||
|
||||
|
@ -975,6 +979,8 @@ class MediaRepository:
|
|||
before_ts: int,
|
||||
size_gt: int = 0,
|
||||
keep_profiles: bool = True,
|
||||
delete_quarantined_media: bool = False,
|
||||
delete_protected_media: bool = False,
|
||||
) -> Tuple[List[str], int]:
|
||||
"""
|
||||
Delete local or remote media from this server by size and timestamp. Removes
|
||||
|
@ -982,18 +988,22 @@ class MediaRepository:
|
|||
|
||||
Args:
|
||||
before_ts: Unix timestamp in ms.
|
||||
Files that were last used before this timestamp will be deleted
|
||||
size_gt: Size of the media in bytes. Files that are larger will be deleted
|
||||
Files that were last used before this timestamp will be deleted.
|
||||
size_gt: Size of the media in bytes. Files that are larger will be deleted.
|
||||
keep_profiles: Switch to delete also files that are still used in image data
|
||||
(e.g user profile, room avatar)
|
||||
If false these files will be deleted
|
||||
(e.g user profile, room avatar). If false these files will be deleted.
|
||||
delete_quarantined_media: If True, media marked as quarantined will be deleted.
|
||||
delete_protected_media: If True, media marked as protected will be deleted.
|
||||
|
||||
Returns:
|
||||
A tuple of (list of deleted media IDs, total deleted media IDs).
|
||||
"""
|
||||
old_media = await self.store.get_local_media_before(
|
||||
old_media = await self.store.get_local_media_ids(
|
||||
before_ts,
|
||||
size_gt,
|
||||
keep_profiles,
|
||||
include_quarantined_media=delete_quarantined_media,
|
||||
include_protected_media=delete_protected_media,
|
||||
)
|
||||
return await self._remove_local_media_from_disk(old_media)
|
||||
|
||||
|
|
|
@ -251,12 +251,36 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
|||
"get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
|
||||
)
|
||||
|
||||
async def get_local_media_before(
|
||||
async def get_local_media_ids(
|
||||
self,
|
||||
before_ts: int,
|
||||
size_gt: int,
|
||||
keep_profiles: bool,
|
||||
include_quarantined_media: bool,
|
||||
include_protected_media: bool,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Retrieve a list of media IDs from the local media store.
|
||||
|
||||
Args:
|
||||
before_ts: Only retrieve IDs from media that was either last accessed
|
||||
(or if never accessed, created) before the given UNIX timestamp in ms.
|
||||
size_gt: Only retrieve IDs from media that has a size (in bytes) greater than
|
||||
the given integer.
|
||||
keep_profiles: If True, exclude media IDs from the results that are used in the
|
||||
following situations:
|
||||
* global profile user avatar
|
||||
* per-room profile user avatar
|
||||
* room avatar
|
||||
* a user's avatar in the user directory
|
||||
include_quarantined_media: If False, exclude media IDs from the results that have
|
||||
been marked as quarantined.
|
||||
include_protected_media: If False, exclude media IDs from the results that have
|
||||
been marked as protected from quarantine.
|
||||
|
||||
Returns:
|
||||
A list of local media IDs.
|
||||
"""
|
||||
|
||||
# to find files that have never been accessed (last_access_ts IS NULL)
|
||||
# compare with `created_ts`
|
||||
|
@ -294,12 +318,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
|||
)
|
||||
sql += sql_keep
|
||||
|
||||
def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]:
|
||||
if include_quarantined_media is False:
|
||||
# Do not include media that has been quarantined
|
||||
sql += """
|
||||
AND quarantined_by IS NULL
|
||||
"""
|
||||
|
||||
if include_protected_media is False:
|
||||
# Do not include media that has been protected from quarantine
|
||||
sql += """
|
||||
AND safe_from_quarantine = false
|
||||
"""
|
||||
|
||||
def _get_local_media_ids_txn(txn: LoggingTransaction) -> List[str]:
|
||||
txn.execute(sql, (before_ts, before_ts, size_gt))
|
||||
return [row[0] for row in txn]
|
||||
|
||||
return await self.db_pool.runInteraction(
|
||||
"get_local_media_before", _get_local_media_before_txn
|
||||
"get_local_media_ids", _get_local_media_ids_txn
|
||||
)
|
||||
|
||||
async def store_local_media(
|
||||
|
@ -599,15 +635,37 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
|||
desc="store_remote_media_thumbnail",
|
||||
)
|
||||
|
||||
async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]:
|
||||
async def get_remote_media_ids(
|
||||
self, before_ts: int, include_quarantined_media: bool
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Retrieve a list of server name, media ID tuples from the remote media cache.
|
||||
|
||||
Args:
|
||||
before_ts: Only retrieve IDs from media that was either last accessed
|
||||
(or if never accessed, created) before the given UNIX timestamp in ms.
|
||||
include_quarantined_media: If False, exclude media IDs from the results that have
|
||||
been marked as quarantined.
|
||||
|
||||
Returns:
|
||||
A list of tuples containing:
|
||||
* The server name of homeserver where the media originates from,
|
||||
* The ID of the media.
|
||||
"""
|
||||
sql = (
|
||||
"SELECT media_origin, media_id, filesystem_id"
|
||||
" FROM remote_media_cache"
|
||||
" WHERE last_access_ts < ?"
|
||||
)
|
||||
|
||||
if include_quarantined_media is False:
|
||||
# Only include media that has not been quarantined
|
||||
sql += """
|
||||
AND quarantined_by IS NULL
|
||||
"""
|
||||
|
||||
return await self.db_pool.execute(
|
||||
"get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
|
||||
"get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts
|
||||
)
|
||||
|
||||
async def delete_remote_media(self, media_origin: str, media_id: str) -> None:
|
||||
|
|
|
@ -53,13 +53,16 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|||
# Create a user to upload media with
|
||||
test_user_id = self.register_user("alice", "password")
|
||||
|
||||
# Inject media (3 images each; recently accessed, old access, never accessed)
|
||||
# into both the local store and the remote cache
|
||||
# Inject media (recently accessed, old access, never accessed, old access
|
||||
# quarantined media) into both the local store and the remote cache, plus
|
||||
# one additional local media that is marked as protected from quarantine.
|
||||
media_repository = hs.get_media_repository()
|
||||
test_media_content = b"example string"
|
||||
|
||||
def _create_media_and_set_last_accessed(
|
||||
def _create_media_and_set_attributes(
|
||||
last_accessed_ms: Optional[int],
|
||||
is_quarantined: Optional[bool] = False,
|
||||
is_protected: Optional[bool] = False,
|
||||
) -> str:
|
||||
# "Upload" some media to the local media store
|
||||
mxc_uri = self.get_success(
|
||||
|
@ -84,10 +87,31 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|||
)
|
||||
)
|
||||
|
||||
if is_quarantined:
|
||||
# Mark this media as quarantined
|
||||
self.get_success(
|
||||
self.store.quarantine_media_by_id(
|
||||
server_name=self.hs.config.server.server_name,
|
||||
media_id=media_id,
|
||||
quarantined_by="@theadmin:test",
|
||||
)
|
||||
)
|
||||
|
||||
if is_protected:
|
||||
# Mark this media as protected from quarantine
|
||||
self.get_success(
|
||||
self.store.mark_local_media_as_safe(
|
||||
media_id=media_id,
|
||||
safe=True,
|
||||
)
|
||||
)
|
||||
|
||||
return media_id
|
||||
|
||||
def _cache_remote_media_and_set_last_accessed(
|
||||
media_id: str, last_accessed_ms: Optional[int]
|
||||
def _cache_remote_media_and_set_attributes(
|
||||
media_id: str,
|
||||
last_accessed_ms: Optional[int],
|
||||
is_quarantined: Optional[bool] = False,
|
||||
) -> str:
|
||||
# Pretend to cache some remote media
|
||||
self.get_success(
|
||||
|
@ -112,23 +136,58 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|||
)
|
||||
)
|
||||
|
||||
if is_quarantined:
|
||||
# Mark this media as quarantined
|
||||
self.get_success(
|
||||
self.store.quarantine_media_by_id(
|
||||
server_name=self.remote_server_name,
|
||||
media_id=media_id,
|
||||
quarantined_by="@theadmin:test",
|
||||
)
|
||||
)
|
||||
|
||||
return media_id
|
||||
|
||||
# Start with the local media store
|
||||
self.local_recently_accessed_media = _create_media_and_set_last_accessed(
|
||||
self.THIRTY_DAYS_IN_MS
|
||||
self.local_recently_accessed_media = _create_media_and_set_attributes(
|
||||
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
||||
)
|
||||
self.local_not_recently_accessed_media = _create_media_and_set_last_accessed(
|
||||
self.ONE_DAY_IN_MS
|
||||
self.local_not_recently_accessed_media = _create_media_and_set_attributes(
|
||||
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||
)
|
||||
self.local_not_recently_accessed_quarantined_media = (
|
||||
_create_media_and_set_attributes(
|
||||
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||
is_quarantined=True,
|
||||
)
|
||||
)
|
||||
self.local_not_recently_accessed_protected_media = (
|
||||
_create_media_and_set_attributes(
|
||||
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||
is_protected=True,
|
||||
)
|
||||
)
|
||||
self.local_never_accessed_media = _create_media_and_set_attributes(
|
||||
last_accessed_ms=None,
|
||||
)
|
||||
self.local_never_accessed_media = _create_media_and_set_last_accessed(None)
|
||||
|
||||
# And now the remote media store
|
||||
self.remote_recently_accessed_media = _cache_remote_media_and_set_last_accessed(
|
||||
"a", self.THIRTY_DAYS_IN_MS
|
||||
self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
|
||||
media_id="a",
|
||||
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
||||
)
|
||||
self.remote_not_recently_accessed_media = (
|
||||
_cache_remote_media_and_set_last_accessed("b", self.ONE_DAY_IN_MS)
|
||||
_cache_remote_media_and_set_attributes(
|
||||
media_id="b",
|
||||
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||
)
|
||||
)
|
||||
self.remote_not_recently_accessed_quarantined_media = (
|
||||
_cache_remote_media_and_set_attributes(
|
||||
media_id="c",
|
||||
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||
is_quarantined=True,
|
||||
)
|
||||
)
|
||||
# Remote media will always have a "last accessed" attribute, as it would not
|
||||
# be fetched from the remote homeserver unless instigated by a user.
|
||||
|
@ -163,8 +222,20 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|||
],
|
||||
not_purged=[
|
||||
(self.hs.config.server.server_name, self.local_recently_accessed_media),
|
||||
(
|
||||
self.hs.config.server.server_name,
|
||||
self.local_not_recently_accessed_quarantined_media,
|
||||
),
|
||||
(
|
||||
self.hs.config.server.server_name,
|
||||
self.local_not_recently_accessed_protected_media,
|
||||
),
|
||||
(self.remote_server_name, self.remote_recently_accessed_media),
|
||||
(self.remote_server_name, self.remote_not_recently_accessed_media),
|
||||
(
|
||||
self.remote_server_name,
|
||||
self.remote_not_recently_accessed_quarantined_media,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -199,6 +270,18 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|||
self.hs.config.server.server_name,
|
||||
self.local_not_recently_accessed_media,
|
||||
),
|
||||
(
|
||||
self.hs.config.server.server_name,
|
||||
self.local_not_recently_accessed_quarantined_media,
|
||||
),
|
||||
(
|
||||
self.hs.config.server.server_name,
|
||||
self.local_not_recently_accessed_protected_media,
|
||||
),
|
||||
(
|
||||
self.remote_server_name,
|
||||
self.remote_not_recently_accessed_quarantined_media,
|
||||
),
|
||||
(self.hs.config.server.server_name, self.local_never_accessed_media),
|
||||
],
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue