mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 16:23:52 +01:00
Merge pull request #4699 from matrix-org/erikj/stop_fed_not_in_room
Stop backpaginating when events not visible
This commit is contained in:
commit
b050a10871
4 changed files with 127 additions and 29 deletions
1
changelog.d/4699.bugfix
Normal file
1
changelog.d/4699.bugfix
Normal file
|
@ -0,0 +1 @@
|
|||
Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events.
|
|
@ -858,6 +858,52 @@ class FederationHandler(BaseHandler):
|
|||
logger.debug("Not backfilling as no extremeties found.")
|
||||
return
|
||||
|
||||
# We only want to paginate if we can actually see the events we'll get,
|
||||
# as otherwise we'll just spend a lot of resources to get redacted
|
||||
# events.
|
||||
#
|
||||
# We do this by filtering all the backwards extremities and seeing if
|
||||
# any remain. Given we don't have the extremity events themselves, we
|
||||
# need to actually check the events that reference them.
|
||||
#
|
||||
# *Note*: the spec wants us to keep backfilling until we reach the start
|
||||
# of the room in case we are allowed to see some of the history. However
|
||||
# in practice that causes more issues than its worth, as a) its
|
||||
# relatively rare for there to be any visible history and b) even when
|
||||
# there is its often sufficiently long ago that clients would stop
|
||||
# attempting to paginate before backfill reached the visible history.
|
||||
#
|
||||
# TODO: If we do do a backfill then we should filter the backwards
|
||||
# extremities to only include those that point to visible portions of
|
||||
# history.
|
||||
#
|
||||
# TODO: Correctly handle the case where we are allowed to see the
|
||||
# forward event but not the backward extremity, e.g. in the case of
|
||||
# initial join of the server where we are allowed to see the join
|
||||
# event but not anything before it. This would require looking at the
|
||||
# state *before* the event, ignoring the special casing certain event
|
||||
# types have.
|
||||
|
||||
forward_events = yield self.store.get_successor_events(
|
||||
list(extremities),
|
||||
)
|
||||
|
||||
extremities_events = yield self.store.get_events(
|
||||
forward_events,
|
||||
check_redacted=False,
|
||||
get_prev_content=False,
|
||||
)
|
||||
|
||||
# We set `check_history_visibility_only` as we might otherwise get false
|
||||
# positives from users having been erased.
|
||||
filtered_extremities = yield filter_events_for_server(
|
||||
self.store, self.server_name, list(extremities_events.values()),
|
||||
redact=False, check_history_visibility_only=True,
|
||||
)
|
||||
|
||||
if not filtered_extremities:
|
||||
defer.returnValue(False)
|
||||
|
||||
# Check if we reached a point where we should start backfilling.
|
||||
sorted_extremeties_tuple = sorted(
|
||||
extremities.items(),
|
||||
|
|
|
@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
|
|||
event_results.reverse()
|
||||
return event_results
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_successor_events(self, event_ids):
|
||||
"""Fetch all events that have the given events as a prev event
|
||||
|
||||
Args:
|
||||
event_ids (iterable[str])
|
||||
|
||||
Returns:
|
||||
Deferred[list[str]]
|
||||
"""
|
||||
rows = yield self._simple_select_many_batch(
|
||||
table="event_edges",
|
||||
column="prev_event_id",
|
||||
iterable=event_ids,
|
||||
retcols=("event_id",),
|
||||
desc="get_successor_events"
|
||||
)
|
||||
|
||||
defer.returnValue([
|
||||
row["event_id"] for row in rows
|
||||
])
|
||||
|
||||
|
||||
class EventFederationStore(EventFederationWorkerStore):
|
||||
""" Responsible for storing and serving up the various graphs associated
|
||||
|
|
|
@ -216,28 +216,36 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
|
|||
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def filter_events_for_server(store, server_name, events):
|
||||
# Whatever else we do, we need to check for senders which have requested
|
||||
# erasure of their data.
|
||||
erased_senders = yield store.are_users_erased(
|
||||
(e.sender for e in events),
|
||||
)
|
||||
def filter_events_for_server(store, server_name, events, redact=True,
|
||||
check_history_visibility_only=False):
|
||||
"""Filter a list of events based on whether given server is allowed to
|
||||
see them.
|
||||
|
||||
def redact_disallowed(event, state):
|
||||
# if the sender has been gdpr17ed, always return a redacted
|
||||
# copy of the event.
|
||||
if erased_senders[event.sender]:
|
||||
Args:
|
||||
store (DataStore)
|
||||
server_name (str)
|
||||
events (iterable[FrozenEvent])
|
||||
redact (bool): Whether to return a redacted version of the event, or
|
||||
to filter them out entirely.
|
||||
check_history_visibility_only (bool): Whether to only check the
|
||||
history visibility, rather than things like if the sender has been
|
||||
erased. This is used e.g. during pagination to decide whether to
|
||||
backfill or not.
|
||||
|
||||
Returns
|
||||
Deferred[list[FrozenEvent]]
|
||||
"""
|
||||
|
||||
def is_sender_erased(event, erased_senders):
|
||||
if erased_senders and erased_senders[event.sender]:
|
||||
logger.info(
|
||||
"Sender of %s has been erased, redacting",
|
||||
event.event_id,
|
||||
)
|
||||
return prune_event(event)
|
||||
|
||||
# state will be None if we decided we didn't need to filter by
|
||||
# room membership.
|
||||
if not state:
|
||||
return event
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_event_is_visible(event, state):
|
||||
history = state.get((EventTypes.RoomHistoryVisibility, ''), None)
|
||||
if history:
|
||||
visibility = history.content.get("history_visibility", "shared")
|
||||
|
@ -259,17 +267,17 @@ def filter_events_for_server(store, server_name, events):
|
|||
|
||||
memtype = ev.membership
|
||||
if memtype == Membership.JOIN:
|
||||
return event
|
||||
return True
|
||||
elif memtype == Membership.INVITE:
|
||||
if visibility == "invited":
|
||||
return event
|
||||
return True
|
||||
else:
|
||||
# server has no users in the room: redact
|
||||
return prune_event(event)
|
||||
return False
|
||||
|
||||
return event
|
||||
return True
|
||||
|
||||
# Next lets check to see if all the events have a history visibility
|
||||
# Lets check to see if all the events have a history visibility
|
||||
# of "shared" or "world_readable". If thats the case then we don't
|
||||
# need to check membership (as we know the server is in the room).
|
||||
event_to_state_ids = yield store.get_state_ids_for_events(
|
||||
|
@ -296,16 +304,31 @@ def filter_events_for_server(store, server_name, events):
|
|||
for e in itervalues(event_map)
|
||||
)
|
||||
|
||||
if not check_history_visibility_only:
|
||||
erased_senders = yield store.are_users_erased(
|
||||
(e.sender for e in events),
|
||||
)
|
||||
else:
|
||||
# We don't want to check whether users are erased, which is equivalent
|
||||
# to no users having been erased.
|
||||
erased_senders = {}
|
||||
|
||||
if all_open:
|
||||
# all the history_visibility state affecting these events is open, so
|
||||
# we don't need to filter by membership state. We *do* need to check
|
||||
# for user erasure, though.
|
||||
if erased_senders:
|
||||
events = [
|
||||
redact_disallowed(e, None)
|
||||
for e in events
|
||||
]
|
||||
to_return = []
|
||||
for e in events:
|
||||
if not is_sender_erased(e, erased_senders):
|
||||
to_return.append(e)
|
||||
elif redact:
|
||||
to_return.append(prune_event(e))
|
||||
|
||||
defer.returnValue(to_return)
|
||||
|
||||
# If there are no erased users then we can just return the given list
|
||||
# of events without having to copy it.
|
||||
defer.returnValue(events)
|
||||
|
||||
# Ok, so we're dealing with events that have non-trivial visibility
|
||||
|
@ -361,7 +384,13 @@ def filter_events_for_server(store, server_name, events):
|
|||
for e_id, key_to_eid in iteritems(event_to_state_ids)
|
||||
}
|
||||
|
||||
defer.returnValue([
|
||||
redact_disallowed(e, event_to_state[e.event_id])
|
||||
for e in events
|
||||
])
|
||||
to_return = []
|
||||
for e in events:
|
||||
erased = is_sender_erased(e, erased_senders)
|
||||
visible = check_event_is_visible(e, event_to_state[e.event_id])
|
||||
if visible and not erased:
|
||||
to_return.append(e)
|
||||
elif redact:
|
||||
to_return.append(prune_event(e))
|
||||
|
||||
defer.returnValue(to_return)
|
||||
|
|
Loading…
Reference in a new issue