forked from MirrorHub/synapse
Prevent local quarantined media from being claimed by media retention (#12972)
This commit is contained in:
parent
f7baffd8ec
commit
a47636c570
6 changed files with 185 additions and 29 deletions
1
changelog.d/12972.feature
Normal file
1
changelog.d/12972.feature
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Add new `media_retention` options to the homeserver config for routinely cleaning up non-recently accessed media.
|
|
@ -1583,6 +1583,12 @@ been accessed, the media's creation time is used instead. Both thumbnails
|
||||||
and the original media will be removed. If either of these options are unset,
|
and the original media will be removed. If either of these options are unset,
|
||||||
then media of that type will not be purged.
|
then media of that type will not be purged.
|
||||||
|
|
||||||
|
Local or cached remote media that has been
|
||||||
|
[quarantined](../../admin_api/media_admin_api.md#quarantining-media-in-a-room)
|
||||||
|
will not be deleted. Similarly, local media that has been marked as
|
||||||
|
[protected from quarantine](../../admin_api/media_admin_api.md#protecting-media-from-being-quarantined)
|
||||||
|
will not be deleted.
|
||||||
|
|
||||||
Example configuration:
|
Example configuration:
|
||||||
```yaml
|
```yaml
|
||||||
media_retention:
|
media_retention:
|
||||||
|
|
|
@ -83,7 +83,7 @@ class QuarantineMediaByUser(RestServlet):
|
||||||
requester = await self.auth.get_user_by_req(request)
|
requester = await self.auth.get_user_by_req(request)
|
||||||
await assert_user_is_admin(self.auth, requester.user)
|
await assert_user_is_admin(self.auth, requester.user)
|
||||||
|
|
||||||
logging.info("Quarantining local media by user: %s", user_id)
|
logging.info("Quarantining media by user: %s", user_id)
|
||||||
|
|
||||||
# Quarantine all media this user has uploaded
|
# Quarantine all media this user has uploaded
|
||||||
num_quarantined = await self.store.quarantine_media_ids_by_user(
|
num_quarantined = await self.store.quarantine_media_ids_by_user(
|
||||||
|
@ -112,7 +112,7 @@ class QuarantineMediaByID(RestServlet):
|
||||||
requester = await self.auth.get_user_by_req(request)
|
requester = await self.auth.get_user_by_req(request)
|
||||||
await assert_user_is_admin(self.auth, requester.user)
|
await assert_user_is_admin(self.auth, requester.user)
|
||||||
|
|
||||||
logging.info("Quarantining local media by ID: %s/%s", server_name, media_id)
|
logging.info("Quarantining media by ID: %s/%s", server_name, media_id)
|
||||||
|
|
||||||
# Quarantine this media id
|
# Quarantine this media id
|
||||||
await self.store.quarantine_media_by_id(
|
await self.store.quarantine_media_by_id(
|
||||||
|
@ -140,9 +140,7 @@ class UnquarantineMediaByID(RestServlet):
|
||||||
) -> Tuple[int, JsonDict]:
|
) -> Tuple[int, JsonDict]:
|
||||||
await assert_requester_is_admin(self.auth, request)
|
await assert_requester_is_admin(self.auth, request)
|
||||||
|
|
||||||
logging.info(
|
logging.info("Remove from quarantine media by ID: %s/%s", server_name, media_id)
|
||||||
"Remove from quarantine local media by ID: %s/%s", server_name, media_id
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove from quarantine this media id
|
# Remove from quarantine this media id
|
||||||
await self.store.quarantine_media_by_id(server_name, media_id, None)
|
await self.store.quarantine_media_by_id(server_name, media_id, None)
|
||||||
|
|
|
@ -919,10 +919,14 @@ class MediaRepository:
|
||||||
await self.delete_old_local_media(
|
await self.delete_old_local_media(
|
||||||
before_ts=local_media_threshold_timestamp_ms,
|
before_ts=local_media_threshold_timestamp_ms,
|
||||||
keep_profiles=True,
|
keep_profiles=True,
|
||||||
|
delete_quarantined_media=False,
|
||||||
|
delete_protected_media=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
|
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
|
||||||
old_media = await self.store.get_remote_media_before(before_ts)
|
old_media = await self.store.get_remote_media_ids(
|
||||||
|
before_ts, include_quarantined_media=False
|
||||||
|
)
|
||||||
|
|
||||||
deleted = 0
|
deleted = 0
|
||||||
|
|
||||||
|
@ -975,6 +979,8 @@ class MediaRepository:
|
||||||
before_ts: int,
|
before_ts: int,
|
||||||
size_gt: int = 0,
|
size_gt: int = 0,
|
||||||
keep_profiles: bool = True,
|
keep_profiles: bool = True,
|
||||||
|
delete_quarantined_media: bool = False,
|
||||||
|
delete_protected_media: bool = False,
|
||||||
) -> Tuple[List[str], int]:
|
) -> Tuple[List[str], int]:
|
||||||
"""
|
"""
|
||||||
Delete local or remote media from this server by size and timestamp. Removes
|
Delete local or remote media from this server by size and timestamp. Removes
|
||||||
|
@ -982,18 +988,22 @@ class MediaRepository:
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
before_ts: Unix timestamp in ms.
|
before_ts: Unix timestamp in ms.
|
||||||
Files that were last used before this timestamp will be deleted
|
Files that were last used before this timestamp will be deleted.
|
||||||
size_gt: Size of the media in bytes. Files that are larger will be deleted
|
size_gt: Size of the media in bytes. Files that are larger will be deleted.
|
||||||
keep_profiles: Switch to delete also files that are still used in image data
|
keep_profiles: Switch to delete also files that are still used in image data
|
||||||
(e.g user profile, room avatar)
|
(e.g user profile, room avatar). If false these files will be deleted.
|
||||||
If false these files will be deleted
|
delete_quarantined_media: If True, media marked as quarantined will be deleted.
|
||||||
|
delete_protected_media: If True, media marked as protected will be deleted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A tuple of (list of deleted media IDs, total deleted media IDs).
|
A tuple of (list of deleted media IDs, total deleted media IDs).
|
||||||
"""
|
"""
|
||||||
old_media = await self.store.get_local_media_before(
|
old_media = await self.store.get_local_media_ids(
|
||||||
before_ts,
|
before_ts,
|
||||||
size_gt,
|
size_gt,
|
||||||
keep_profiles,
|
keep_profiles,
|
||||||
|
include_quarantined_media=delete_quarantined_media,
|
||||||
|
include_protected_media=delete_protected_media,
|
||||||
)
|
)
|
||||||
return await self._remove_local_media_from_disk(old_media)
|
return await self._remove_local_media_from_disk(old_media)
|
||||||
|
|
||||||
|
|
|
@ -251,12 +251,36 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
||||||
"get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
|
"get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_local_media_before(
|
async def get_local_media_ids(
|
||||||
self,
|
self,
|
||||||
before_ts: int,
|
before_ts: int,
|
||||||
size_gt: int,
|
size_gt: int,
|
||||||
keep_profiles: bool,
|
keep_profiles: bool,
|
||||||
|
include_quarantined_media: bool,
|
||||||
|
include_protected_media: bool,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Retrieve a list of media IDs from the local media store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
before_ts: Only retrieve IDs from media that was either last accessed
|
||||||
|
(or if never accessed, created) before the given UNIX timestamp in ms.
|
||||||
|
size_gt: Only retrieve IDs from media that has a size (in bytes) greater than
|
||||||
|
the given integer.
|
||||||
|
keep_profiles: If True, exclude media IDs from the results that are used in the
|
||||||
|
following situations:
|
||||||
|
* global profile user avatar
|
||||||
|
* per-room profile user avatar
|
||||||
|
* room avatar
|
||||||
|
* a user's avatar in the user directory
|
||||||
|
include_quarantined_media: If False, exclude media IDs from the results that have
|
||||||
|
been marked as quarantined.
|
||||||
|
include_protected_media: If False, exclude media IDs from the results that have
|
||||||
|
been marked as protected from quarantine.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of local media IDs.
|
||||||
|
"""
|
||||||
|
|
||||||
# to find files that have never been accessed (last_access_ts IS NULL)
|
# to find files that have never been accessed (last_access_ts IS NULL)
|
||||||
# compare with `created_ts`
|
# compare with `created_ts`
|
||||||
|
@ -294,12 +318,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
||||||
)
|
)
|
||||||
sql += sql_keep
|
sql += sql_keep
|
||||||
|
|
||||||
def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]:
|
if include_quarantined_media is False:
|
||||||
|
# Do not include media that has been quarantined
|
||||||
|
sql += """
|
||||||
|
AND quarantined_by IS NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
if include_protected_media is False:
|
||||||
|
# Do not include media that has been protected from quarantine
|
||||||
|
sql += """
|
||||||
|
AND safe_from_quarantine = false
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _get_local_media_ids_txn(txn: LoggingTransaction) -> List[str]:
|
||||||
txn.execute(sql, (before_ts, before_ts, size_gt))
|
txn.execute(sql, (before_ts, before_ts, size_gt))
|
||||||
return [row[0] for row in txn]
|
return [row[0] for row in txn]
|
||||||
|
|
||||||
return await self.db_pool.runInteraction(
|
return await self.db_pool.runInteraction(
|
||||||
"get_local_media_before", _get_local_media_before_txn
|
"get_local_media_ids", _get_local_media_ids_txn
|
||||||
)
|
)
|
||||||
|
|
||||||
async def store_local_media(
|
async def store_local_media(
|
||||||
|
@ -599,15 +635,37 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
|
||||||
desc="store_remote_media_thumbnail",
|
desc="store_remote_media_thumbnail",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]:
|
async def get_remote_media_ids(
|
||||||
|
self, before_ts: int, include_quarantined_media: bool
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Retrieve a list of server name, media ID tuples from the remote media cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
before_ts: Only retrieve IDs from media that was either last accessed
|
||||||
|
(or if never accessed, created) before the given UNIX timestamp in ms.
|
||||||
|
include_quarantined_media: If False, exclude media IDs from the results that have
|
||||||
|
been marked as quarantined.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of tuples containing:
|
||||||
|
* The server name of homeserver where the media originates from,
|
||||||
|
* The ID of the media.
|
||||||
|
"""
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT media_origin, media_id, filesystem_id"
|
"SELECT media_origin, media_id, filesystem_id"
|
||||||
" FROM remote_media_cache"
|
" FROM remote_media_cache"
|
||||||
" WHERE last_access_ts < ?"
|
" WHERE last_access_ts < ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if include_quarantined_media is False:
|
||||||
|
# Only include media that has not been quarantined
|
||||||
|
sql += """
|
||||||
|
AND quarantined_by IS NULL
|
||||||
|
"""
|
||||||
|
|
||||||
return await self.db_pool.execute(
|
return await self.db_pool.execute(
|
||||||
"get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
|
"get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts
|
||||||
)
|
)
|
||||||
|
|
||||||
async def delete_remote_media(self, media_origin: str, media_id: str) -> None:
|
async def delete_remote_media(self, media_origin: str, media_id: str) -> None:
|
||||||
|
|
|
@ -53,13 +53,16 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
||||||
# Create a user to upload media with
|
# Create a user to upload media with
|
||||||
test_user_id = self.register_user("alice", "password")
|
test_user_id = self.register_user("alice", "password")
|
||||||
|
|
||||||
# Inject media (3 images each; recently accessed, old access, never accessed)
|
# Inject media (recently accessed, old access, never accessed, old access
|
||||||
# into both the local store and the remote cache
|
# quarantined media) into both the local store and the remote cache, plus
|
||||||
|
# one additional local media that is marked as protected from quarantine.
|
||||||
media_repository = hs.get_media_repository()
|
media_repository = hs.get_media_repository()
|
||||||
test_media_content = b"example string"
|
test_media_content = b"example string"
|
||||||
|
|
||||||
def _create_media_and_set_last_accessed(
|
def _create_media_and_set_attributes(
|
||||||
last_accessed_ms: Optional[int],
|
last_accessed_ms: Optional[int],
|
||||||
|
is_quarantined: Optional[bool] = False,
|
||||||
|
is_protected: Optional[bool] = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
# "Upload" some media to the local media store
|
# "Upload" some media to the local media store
|
||||||
mxc_uri = self.get_success(
|
mxc_uri = self.get_success(
|
||||||
|
@ -84,10 +87,31 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if is_quarantined:
|
||||||
|
# Mark this media as quarantined
|
||||||
|
self.get_success(
|
||||||
|
self.store.quarantine_media_by_id(
|
||||||
|
server_name=self.hs.config.server.server_name,
|
||||||
|
media_id=media_id,
|
||||||
|
quarantined_by="@theadmin:test",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_protected:
|
||||||
|
# Mark this media as protected from quarantine
|
||||||
|
self.get_success(
|
||||||
|
self.store.mark_local_media_as_safe(
|
||||||
|
media_id=media_id,
|
||||||
|
safe=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return media_id
|
return media_id
|
||||||
|
|
||||||
def _cache_remote_media_and_set_last_accessed(
|
def _cache_remote_media_and_set_attributes(
|
||||||
media_id: str, last_accessed_ms: Optional[int]
|
media_id: str,
|
||||||
|
last_accessed_ms: Optional[int],
|
||||||
|
is_quarantined: Optional[bool] = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
# Pretend to cache some remote media
|
# Pretend to cache some remote media
|
||||||
self.get_success(
|
self.get_success(
|
||||||
|
@ -112,23 +136,58 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if is_quarantined:
|
||||||
|
# Mark this media as quarantined
|
||||||
|
self.get_success(
|
||||||
|
self.store.quarantine_media_by_id(
|
||||||
|
server_name=self.remote_server_name,
|
||||||
|
media_id=media_id,
|
||||||
|
quarantined_by="@theadmin:test",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return media_id
|
return media_id
|
||||||
|
|
||||||
# Start with the local media store
|
# Start with the local media store
|
||||||
self.local_recently_accessed_media = _create_media_and_set_last_accessed(
|
self.local_recently_accessed_media = _create_media_and_set_attributes(
|
||||||
self.THIRTY_DAYS_IN_MS
|
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
||||||
)
|
)
|
||||||
self.local_not_recently_accessed_media = _create_media_and_set_last_accessed(
|
self.local_not_recently_accessed_media = _create_media_and_set_attributes(
|
||||||
self.ONE_DAY_IN_MS
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||||
|
)
|
||||||
|
self.local_not_recently_accessed_quarantined_media = (
|
||||||
|
_create_media_and_set_attributes(
|
||||||
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||||
|
is_quarantined=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.local_not_recently_accessed_protected_media = (
|
||||||
|
_create_media_and_set_attributes(
|
||||||
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||||
|
is_protected=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.local_never_accessed_media = _create_media_and_set_attributes(
|
||||||
|
last_accessed_ms=None,
|
||||||
)
|
)
|
||||||
self.local_never_accessed_media = _create_media_and_set_last_accessed(None)
|
|
||||||
|
|
||||||
# And now the remote media store
|
# And now the remote media store
|
||||||
self.remote_recently_accessed_media = _cache_remote_media_and_set_last_accessed(
|
self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
|
||||||
"a", self.THIRTY_DAYS_IN_MS
|
media_id="a",
|
||||||
|
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
||||||
)
|
)
|
||||||
self.remote_not_recently_accessed_media = (
|
self.remote_not_recently_accessed_media = (
|
||||||
_cache_remote_media_and_set_last_accessed("b", self.ONE_DAY_IN_MS)
|
_cache_remote_media_and_set_attributes(
|
||||||
|
media_id="b",
|
||||||
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.remote_not_recently_accessed_quarantined_media = (
|
||||||
|
_cache_remote_media_and_set_attributes(
|
||||||
|
media_id="c",
|
||||||
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
||||||
|
is_quarantined=True,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
# Remote media will always have a "last accessed" attribute, as it would not
|
# Remote media will always have a "last accessed" attribute, as it would not
|
||||||
# be fetched from the remote homeserver unless instigated by a user.
|
# be fetched from the remote homeserver unless instigated by a user.
|
||||||
|
@ -163,8 +222,20 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
||||||
],
|
],
|
||||||
not_purged=[
|
not_purged=[
|
||||||
(self.hs.config.server.server_name, self.local_recently_accessed_media),
|
(self.hs.config.server.server_name, self.local_recently_accessed_media),
|
||||||
|
(
|
||||||
|
self.hs.config.server.server_name,
|
||||||
|
self.local_not_recently_accessed_quarantined_media,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
self.hs.config.server.server_name,
|
||||||
|
self.local_not_recently_accessed_protected_media,
|
||||||
|
),
|
||||||
(self.remote_server_name, self.remote_recently_accessed_media),
|
(self.remote_server_name, self.remote_recently_accessed_media),
|
||||||
(self.remote_server_name, self.remote_not_recently_accessed_media),
|
(self.remote_server_name, self.remote_not_recently_accessed_media),
|
||||||
|
(
|
||||||
|
self.remote_server_name,
|
||||||
|
self.remote_not_recently_accessed_quarantined_media,
|
||||||
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -199,6 +270,18 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
||||||
self.hs.config.server.server_name,
|
self.hs.config.server.server_name,
|
||||||
self.local_not_recently_accessed_media,
|
self.local_not_recently_accessed_media,
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
self.hs.config.server.server_name,
|
||||||
|
self.local_not_recently_accessed_quarantined_media,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
self.hs.config.server.server_name,
|
||||||
|
self.local_not_recently_accessed_protected_media,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
self.remote_server_name,
|
||||||
|
self.remote_not_recently_accessed_quarantined_media,
|
||||||
|
),
|
||||||
(self.hs.config.server.server_name, self.local_never_accessed_media),
|
(self.hs.config.server.server_name, self.local_never_accessed_media),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue