Prevent redacted events from appearing in message search (#6377)

This commit is contained in:
Andrew Morgan 2019-12-11 13:39:47 +00:00 committed by GitHub
parent 6676ee9c4a
commit fc316a4894
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 78 additions and 43 deletions

1
changelog.d/6377.bugfix Normal file
View file

@ -0,0 +1 @@
Prevent redacted events from being returned during message search.

View file

@ -63,6 +63,7 @@ from synapse.replication.http.federation import (
) )
from synapse.replication.http.membership import ReplicationUserJoinedLeftRoomRestServlet from synapse.replication.http.membership import ReplicationUserJoinedLeftRoomRestServlet
from synapse.state import StateResolutionStore, resolve_events_with_store from synapse.state import StateResolutionStore, resolve_events_with_store
from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
from synapse.types import UserID, get_domain_from_id from synapse.types import UserID, get_domain_from_id
from synapse.util import batch_iter, unwrapFirstError from synapse.util import batch_iter, unwrapFirstError
from synapse.util.async_helpers import Linearizer from synapse.util.async_helpers import Linearizer
@ -423,7 +424,7 @@ class FederationHandler(BaseHandler):
evs = yield self.store.get_events( evs = yield self.store.get_events(
list(state_map.values()), list(state_map.values()),
get_prev_content=False, get_prev_content=False,
check_redacted=False, redact_behaviour=EventRedactBehaviour.AS_IS,
) )
event_map.update(evs) event_map.update(evs)
@ -1000,7 +1001,9 @@ class FederationHandler(BaseHandler):
forward_events = yield self.store.get_successor_events(list(extremities)) forward_events = yield self.store.get_successor_events(list(extremities))
extremities_events = yield self.store.get_events( extremities_events = yield self.store.get_events(
forward_events, check_redacted=False, get_prev_content=False forward_events,
redact_behaviour=EventRedactBehaviour.AS_IS,
get_prev_content=False,
) )
# We set `check_history_visibility_only` as we might otherwise get false # We set `check_history_visibility_only` as we might otherwise get false

View file

@ -46,6 +46,7 @@ from synapse.events.validator import EventValidator
from synapse.logging.context import run_in_background from synapse.logging.context import run_in_background
from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.replication.http.send_event import ReplicationSendEventRestServlet
from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
from synapse.storage.state import StateFilter from synapse.storage.state import StateFilter
from synapse.types import RoomAlias, UserID, create_requester from synapse.types import RoomAlias, UserID, create_requester
from synapse.util.async_helpers import Linearizer from synapse.util.async_helpers import Linearizer
@ -875,7 +876,7 @@ class EventCreationHandler(object):
if event.type == EventTypes.Redaction: if event.type == EventTypes.Redaction:
original_event = yield self.store.get_event( original_event = yield self.store.get_event(
event.redacts, event.redacts,
check_redacted=False, redact_behaviour=EventRedactBehaviour.AS_IS,
get_prev_content=False, get_prev_content=False,
allow_rejected=False, allow_rejected=False,
allow_none=True, allow_none=True,
@ -952,7 +953,7 @@ class EventCreationHandler(object):
if event.type == EventTypes.Redaction: if event.type == EventTypes.Redaction:
original_event = yield self.store.get_event( original_event = yield self.store.get_event(
event.redacts, event.redacts,
check_redacted=False, redact_behaviour=EventRedactBehaviour.AS_IS,
get_prev_content=False, get_prev_content=False,
allow_rejected=False, allow_rejected=False,
allow_none=True, allow_none=True,

View file

@ -32,6 +32,7 @@ from synapse.events import EventBase
from synapse.events.snapshot import EventContext from synapse.events.snapshot import EventContext
from synapse.logging.utils import log_function from synapse.logging.utils import log_function
from synapse.state import v1, v2 from synapse.state import v1, v2
from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
from synapse.util.async_helpers import Linearizer from synapse.util.async_helpers import Linearizer
from synapse.util.caches import get_cache_factor_for from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.expiringcache import ExpiringCache
@ -645,7 +646,7 @@ class StateResolutionStore(object):
return self.store.get_events( return self.store.get_events(
event_ids, event_ids,
check_redacted=False, redact_behaviour=EventRedactBehaviour.AS_IS,
get_prev_content=False, get_prev_content=False,
allow_rejected=allow_rejected, allow_rejected=allow_rejected,
) )

View file

@ -19,8 +19,10 @@ import itertools
import logging import logging
import threading import threading
from collections import namedtuple from collections import namedtuple
from typing import List, Optional
from canonicaljson import json from canonicaljson import json
from constantly import NamedConstant, Names
from twisted.internet import defer from twisted.internet import defer
@ -55,6 +57,16 @@ EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events
_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) _EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event"))
class EventRedactBehaviour(Names):
"""
What to do when retrieving a redacted event from the database.
"""
AS_IS = NamedConstant()
REDACT = NamedConstant()
BLOCK = NamedConstant()
class EventsWorkerStore(SQLBaseStore): class EventsWorkerStore(SQLBaseStore):
def __init__(self, database: Database, db_conn, hs): def __init__(self, database: Database, db_conn, hs):
super(EventsWorkerStore, self).__init__(database, db_conn, hs) super(EventsWorkerStore, self).__init__(database, db_conn, hs)
@ -125,25 +137,27 @@ class EventsWorkerStore(SQLBaseStore):
@defer.inlineCallbacks @defer.inlineCallbacks
def get_event( def get_event(
self, self,
event_id, event_id: List[str],
check_redacted=True, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
get_prev_content=False, get_prev_content: bool = False,
allow_rejected=False, allow_rejected: bool = False,
allow_none=False, allow_none: bool = False,
check_room_id=None, check_room_id: Optional[str] = None,
): ):
"""Get an event from the database by event_id. """Get an event from the database by event_id.
Args: Args:
event_id (str): The event_id of the event to fetch event_id: The event_id of the event to fetch
check_redacted (bool): If True, check if event has been redacted redact_behaviour: Determine what to do with a redacted event. Possible values:
and redact it. * AS_IS - Return the full event body with no redacted content
get_prev_content (bool): If True and event is a state event, * REDACT - Return the event but with a redacted body
* DISALLOW - Do not return redacted events
get_prev_content: If True and event is a state event,
include the previous states content in the unsigned field. include the previous states content in the unsigned field.
allow_rejected (bool): If True return rejected events. allow_rejected: If True return rejected events.
allow_none (bool): If True, return None if no event found, if allow_none: If True, return None if no event found, if
False throw a NotFoundError False throw a NotFoundError
check_room_id (str|None): if not None, check the room of the found event. check_room_id: if not None, check the room of the found event.
If there is a mismatch, behave as per allow_none. If there is a mismatch, behave as per allow_none.
Returns: Returns:
@ -154,7 +168,7 @@ class EventsWorkerStore(SQLBaseStore):
events = yield self.get_events_as_list( events = yield self.get_events_as_list(
[event_id], [event_id],
check_redacted=check_redacted, redact_behaviour=redact_behaviour,
get_prev_content=get_prev_content, get_prev_content=get_prev_content,
allow_rejected=allow_rejected, allow_rejected=allow_rejected,
) )
@ -173,27 +187,30 @@ class EventsWorkerStore(SQLBaseStore):
@defer.inlineCallbacks @defer.inlineCallbacks
def get_events( def get_events(
self, self,
event_ids, event_ids: List[str],
check_redacted=True, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
get_prev_content=False, get_prev_content: bool = False,
allow_rejected=False, allow_rejected: bool = False,
): ):
"""Get events from the database """Get events from the database
Args: Args:
event_ids (list): The event_ids of the events to fetch event_ids: The event_ids of the events to fetch
check_redacted (bool): If True, check if event has been redacted redact_behaviour: Determine what to do with a redacted event. Possible
and redact it. values:
get_prev_content (bool): If True and event is a state event, * AS_IS - Return the full event body with no redacted content
* REDACT - Return the event but with a redacted body
* DISALLOW - Do not return redacted events
get_prev_content: If True and event is a state event,
include the previous states content in the unsigned field. include the previous states content in the unsigned field.
allow_rejected (bool): If True return rejected events. allow_rejected: If True return rejected events.
Returns: Returns:
Deferred : Dict from event_id to event. Deferred : Dict from event_id to event.
""" """
events = yield self.get_events_as_list( events = yield self.get_events_as_list(
event_ids, event_ids,
check_redacted=check_redacted, redact_behaviour=redact_behaviour,
get_prev_content=get_prev_content, get_prev_content=get_prev_content,
allow_rejected=allow_rejected, allow_rejected=allow_rejected,
) )
@ -203,21 +220,23 @@ class EventsWorkerStore(SQLBaseStore):
@defer.inlineCallbacks @defer.inlineCallbacks
def get_events_as_list( def get_events_as_list(
self, self,
event_ids, event_ids: List[str],
check_redacted=True, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
get_prev_content=False, get_prev_content: bool = False,
allow_rejected=False, allow_rejected: bool = False,
): ):
"""Get events from the database and return in a list in the same order """Get events from the database and return in a list in the same order
as given by `event_ids` arg. as given by `event_ids` arg.
Args: Args:
event_ids (list): The event_ids of the events to fetch event_ids: The event_ids of the events to fetch
check_redacted (bool): If True, check if event has been redacted redact_behaviour: Determine what to do with a redacted event. Possible values:
and redact it. * AS_IS - Return the full event body with no redacted content
get_prev_content (bool): If True and event is a state event, * REDACT - Return the event but with a redacted body
* DISALLOW - Do not return redacted events
get_prev_content: If True and event is a state event,
include the previous states content in the unsigned field. include the previous states content in the unsigned field.
allow_rejected (bool): If True return rejected events. allow_rejected: If True, return rejected events.
Returns: Returns:
Deferred[list[EventBase]]: List of events fetched from the database. The Deferred[list[EventBase]]: List of events fetched from the database. The
@ -319,11 +338,15 @@ class EventsWorkerStore(SQLBaseStore):
# Update the cache to save doing the checks again. # Update the cache to save doing the checks again.
entry.event.internal_metadata.recheck_redaction = False entry.event.internal_metadata.recheck_redaction = False
if check_redacted and entry.redacted_event:
event = entry.redacted_event
else:
event = entry.event event = entry.event
if entry.redacted_event:
if redact_behaviour == EventRedactBehaviour.BLOCK:
# Skip this event
continue
elif redact_behaviour == EventRedactBehaviour.REDACT:
event = entry.redacted_event
events.append(event) events.append(event)
if get_prev_content: if get_prev_content:

View file

@ -25,6 +25,7 @@ from twisted.internet import defer
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
from synapse.storage.database import Database from synapse.storage.database import Database
from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.engines import PostgresEngine, Sqlite3Engine
@ -453,7 +454,12 @@ class SearchStore(SearchBackgroundUpdateStore):
results = list(filter(lambda row: row["room_id"] in room_ids, results)) results = list(filter(lambda row: row["room_id"] in room_ids, results))
events = yield self.get_events_as_list([r["event_id"] for r in results]) # We set redact_behaviour to BLOCK here to prevent redacted events being returned in
# search results (which is a data leak)
events = yield self.get_events_as_list(
[r["event_id"] for r in results],
redact_behaviour=EventRedactBehaviour.BLOCK,
)
event_map = {ev.event_id: ev for ev in events} event_map = {ev.event_id: ev for ev in events}