Merge pull request #3221 from matrix-org/erikj/purge_token

Make purge_history operate on tokens
This commit is contained in:
Erik Johnston 2018-05-18 10:35:23 +01:00 committed by GitHub
commit fa30ac38cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 37 deletions

View file

@ -86,14 +86,14 @@ class MessageHandler(BaseHandler):
# map from purge id to PurgeStatus # map from purge id to PurgeStatus
self._purges_by_id = {} self._purges_by_id = {}
def start_purge_history(self, room_id, topological_ordering, def start_purge_history(self, room_id, token,
delete_local_events=False): delete_local_events=False):
"""Start off a history purge on a room. """Start off a history purge on a room.
Args: Args:
room_id (str): The room to purge from room_id (str): The room to purge from
topological_ordering (int): minimum topo ordering to preserve token (str): topological token to delete events before
delete_local_events (bool): True to delete local events as well as delete_local_events (bool): True to delete local events as well as
remote ones remote ones
@ -115,19 +115,19 @@ class MessageHandler(BaseHandler):
self._purges_by_id[purge_id] = PurgeStatus() self._purges_by_id[purge_id] = PurgeStatus()
run_in_background( run_in_background(
self._purge_history, self._purge_history,
purge_id, room_id, topological_ordering, delete_local_events, purge_id, room_id, token, delete_local_events,
) )
return purge_id return purge_id
@defer.inlineCallbacks @defer.inlineCallbacks
def _purge_history(self, purge_id, room_id, topological_ordering, def _purge_history(self, purge_id, room_id, token,
delete_local_events): delete_local_events):
"""Carry out a history purge on a room. """Carry out a history purge on a room.
Args: Args:
purge_id (str): The id for this purge purge_id (str): The id for this purge
room_id (str): The room to purge from room_id (str): The room to purge from
topological_ordering (int): minimum topo ordering to preserve token (str): topological token to delete events before
delete_local_events (bool): True to delete local events as well as delete_local_events (bool): True to delete local events as well as
remote ones remote ones
@ -138,7 +138,7 @@ class MessageHandler(BaseHandler):
try: try:
with (yield self.pagination_lock.write(room_id)): with (yield self.pagination_lock.write(room_id)):
yield self.store.purge_history( yield self.store.purge_history(
room_id, topological_ordering, delete_local_events, room_id, token, delete_local_events,
) )
logger.info("[purge] complete") logger.info("[purge] complete")
self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE

View file

@ -151,10 +151,11 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
if event.room_id != room_id: if event.room_id != room_id:
raise SynapseError(400, "Event is for wrong room.") raise SynapseError(400, "Event is for wrong room.")
depth = event.depth token = yield self.store.get_topological_token_for_event(event_id)
logger.info( logger.info(
"[purge] purging up to depth %i (event_id %s)", "[purge] purging up to token %s (event_id %s)",
depth, event_id, token, event_id,
) )
elif 'purge_up_to_ts' in body: elif 'purge_up_to_ts' in body:
ts = body['purge_up_to_ts'] ts = body['purge_up_to_ts']
@ -174,7 +175,9 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
) )
) )
if room_event_after_stream_ordering: if room_event_after_stream_ordering:
(_, depth, _) = room_event_after_stream_ordering token = yield self.store.get_topological_token_for_event(
room_event_after_stream_ordering,
)
else: else:
logger.warn( logger.warn(
"[purge] purging events not possible: No event found " "[purge] purging events not possible: No event found "
@ -187,9 +190,9 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
errcode=Codes.NOT_FOUND, errcode=Codes.NOT_FOUND,
) )
logger.info( logger.info(
"[purge] purging up to depth %i (received_ts %i => " "[purge] purging up to token %d (received_ts %i => "
"stream_ordering %i)", "stream_ordering %i)",
depth, ts, stream_ordering, token, ts, stream_ordering,
) )
else: else:
raise SynapseError( raise SynapseError(
@ -199,7 +202,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
) )
purge_id = yield self.handlers.message_handler.start_purge_history( purge_id = yield self.handlers.message_handler.start_purge_history(
room_id, depth, room_id, token,
delete_local_events=delete_local_events, delete_local_events=delete_local_events,
) )

View file

@ -33,7 +33,7 @@ from synapse.util.metrics import Measure
from synapse.api.constants import EventTypes from synapse.api.constants import EventTypes
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
from synapse.types import get_domain_from_id from synapse.types import get_domain_from_id, RoomStreamToken
import synapse.metrics import synapse.metrics
# these are only included to make the type annotations work # these are only included to make the type annotations work
@ -1803,15 +1803,14 @@ class EventsStore(EventsWorkerStore):
return self.runInteraction("get_all_new_events", get_all_new_events_txn) return self.runInteraction("get_all_new_events", get_all_new_events_txn)
def purge_history( def purge_history(
self, room_id, topological_ordering, delete_local_events, self, room_id, token, delete_local_events,
): ):
"""Deletes room history before a certain point """Deletes room history before a certain point
Args: Args:
room_id (str): room_id (str):
topological_ordering (int): token (str): A topological token to delete events before
minimum topo ordering to preserve
delete_local_events (bool): delete_local_events (bool):
if True, we will delete local events as well as remote ones if True, we will delete local events as well as remote ones
@ -1821,13 +1820,15 @@ class EventsStore(EventsWorkerStore):
return self.runInteraction( return self.runInteraction(
"purge_history", "purge_history",
self._purge_history_txn, room_id, topological_ordering, self._purge_history_txn, room_id, token,
delete_local_events, delete_local_events,
) )
def _purge_history_txn( def _purge_history_txn(
self, txn, room_id, topological_ordering, delete_local_events, self, txn, room_id, token_str, delete_local_events,
): ):
token = RoomStreamToken.parse(token_str)
# Tables that should be pruned: # Tables that should be pruned:
# event_auth # event_auth
# event_backward_extremities # event_backward_extremities
@ -1872,6 +1873,13 @@ class EventsStore(EventsWorkerStore):
" ON events_to_purge(should_delete)", " ON events_to_purge(should_delete)",
) )
# We do joins against events_to_purge for e.g. calculating state
# groups to purge, etc., so lets make an index.
txn.execute(
"CREATE INDEX events_to_purge_id"
" ON events_to_purge(event_id)",
)
# First ensure that we're not about to delete all the forward extremeties # First ensure that we're not about to delete all the forward extremeties
txn.execute( txn.execute(
"SELECT e.event_id, e.depth FROM events as e " "SELECT e.event_id, e.depth FROM events as e "
@ -1884,7 +1892,7 @@ class EventsStore(EventsWorkerStore):
rows = txn.fetchall() rows = txn.fetchall()
max_depth = max(row[0] for row in rows) max_depth = max(row[0] for row in rows)
if max_depth <= topological_ordering: if max_depth <= token.topological:
# We need to ensure we don't delete all the events from the datanase # We need to ensure we don't delete all the events from the datanase
# otherwise we wouldn't be able to send any events (due to not # otherwise we wouldn't be able to send any events (due to not
# having any backwards extremeties) # having any backwards extremeties)
@ -1900,7 +1908,7 @@ class EventsStore(EventsWorkerStore):
should_delete_expr += " AND event_id NOT LIKE ?" should_delete_expr += " AND event_id NOT LIKE ?"
should_delete_params += ("%:" + self.hs.hostname, ) should_delete_params += ("%:" + self.hs.hostname, )
should_delete_params += (room_id, topological_ordering) should_delete_params += (room_id, token.topological)
txn.execute( txn.execute(
"INSERT INTO events_to_purge" "INSERT INTO events_to_purge"
@ -1923,13 +1931,13 @@ class EventsStore(EventsWorkerStore):
logger.info("[purge] Finding new backward extremities") logger.info("[purge] Finding new backward extremities")
# We calculate the new entries for the backward extremeties by finding # We calculate the new entries for the backward extremeties by finding
# all events that point to events that are to be purged # events to be purged that are pointed to by events we're not going to
# purge.
txn.execute( txn.execute(
"SELECT DISTINCT e.event_id FROM events_to_purge AS e" "SELECT DISTINCT e.event_id FROM events_to_purge AS e"
" INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id"
" INNER JOIN events AS e2 ON e2.event_id = ed.event_id" " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id"
" WHERE e2.topological_ordering >= ?", " WHERE ep2.event_id IS NULL",
(topological_ordering, )
) )
new_backwards_extrems = txn.fetchall() new_backwards_extrems = txn.fetchall()
@ -1953,16 +1961,22 @@ class EventsStore(EventsWorkerStore):
# Get all state groups that are only referenced by events that are # Get all state groups that are only referenced by events that are
# to be deleted. # to be deleted.
txn.execute( # This works by first getting state groups that we may want to delete,
"SELECT state_group FROM event_to_state_groups" # joining against event_to_state_groups to get events that use that
" INNER JOIN events USING (event_id)" # state group, then left joining against events_to_purge again. Any
" WHERE state_group IN (" # state group where the left join produce *no nulls* are referenced
" SELECT DISTINCT state_group FROM events_to_purge" # only by events that are going to be purged.
" INNER JOIN event_to_state_groups USING (event_id)" txn.execute("""
" )" SELECT state_group FROM
" GROUP BY state_group HAVING MAX(topological_ordering) < ?", (
(topological_ordering, ) SELECT DISTINCT state_group FROM events_to_purge
) INNER JOIN event_to_state_groups USING (event_id)
) AS sp
INNER JOIN event_to_state_groups USING (state_group)
LEFT JOIN events_to_purge AS ep USING (event_id)
GROUP BY state_group
HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0
""")
state_rows = txn.fetchall() state_rows = txn.fetchall()
logger.info("[purge] found %i redundant state groups", len(state_rows)) logger.info("[purge] found %i redundant state groups", len(state_rows))
@ -2109,10 +2123,25 @@ class EventsStore(EventsWorkerStore):
# #
# So, let's stick it at the end so that we don't block event # So, let's stick it at the end so that we don't block event
# persistence. # persistence.
logger.info("[purge] updating room_depth") #
# We do this by calculating the minimum depth of the backwards
# extremities. However, the events in event_backward_extremities
# are ones we don't have yet so we need to look at the events that
# point to it via event_edges table.
txn.execute("""
SELECT COALESCE(MIN(depth), 0)
FROM event_backward_extremities AS eb
INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id
INNER JOIN events AS e ON e.event_id = eg.event_id
WHERE eb.room_id = ?
""", (room_id,))
min_depth, = txn.fetchone()
logger.info("[purge] updating room_depth to %d", min_depth)
txn.execute( txn.execute(
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?", "UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
(topological_ordering, room_id,) (min_depth, room_id,)
) )
# finally, drop the temp table. this will commit the txn in sqlite, # finally, drop the temp table. this will commit the txn in sqlite,