Refactor store.have_events

It turns out that most of the time we were calling have_events, we were only
using half of the result. Replace have_events with have_seen_events and
get_rejection_reasons, so that we can see what's going on a bit more clearly.
This commit is contained in:
Richard van der Hoff 2018-04-17 18:30:53 +01:00
parent 512633ef44
commit b1dfbc3c40
3 changed files with 55 additions and 27 deletions

View file

@ -394,7 +394,7 @@ class FederationClient(FederationBase):
seen_events = yield self.store.get_events(event_ids, allow_rejected=True)
signed_events = seen_events.values()
else:
seen_events = yield self.store.have_events(event_ids)
seen_events = yield self.store.have_seen_events(event_ids)
signed_events = []
failed_to_fetch = set()

View file

@ -149,10 +149,6 @@ class FederationHandler(BaseHandler):
auth_chain = []
have_seen = yield self.store.have_events(
[ev for ev, _ in pdu.prev_events]
)
fetch_state = False
# Get missing pdus if necessary.
@ -168,7 +164,7 @@ class FederationHandler(BaseHandler):
)
prevs = {e_id for e_id, _ in pdu.prev_events}
seen = set(have_seen.keys())
seen = yield self.store.have_seen_events(prevs)
if min_depth and pdu.depth < min_depth:
# This is so that we don't notify the user about this
@ -196,8 +192,7 @@ class FederationHandler(BaseHandler):
# Update the set of things we've seen after trying to
# fetch the missing stuff
have_seen = yield self.store.have_events(prevs)
seen = set(have_seen.iterkeys())
seen = yield self.store.have_seen_events(prevs)
if not prevs - seen:
logger.info(
@ -248,8 +243,7 @@ class FederationHandler(BaseHandler):
min_depth (int): Minimum depth of events to return.
"""
# We recalculate seen, since it may have changed.
have_seen = yield self.store.have_events(prevs)
seen = set(have_seen.keys())
seen = yield self.store.have_seen_events(prevs)
if not prevs - seen:
return
@ -361,9 +355,7 @@ class FederationHandler(BaseHandler):
if auth_chain:
event_ids |= {e.event_id for e in auth_chain}
seen_ids = set(
(yield self.store.have_events(event_ids)).keys()
)
seen_ids = yield self.store.have_seen_events(event_ids)
if state and auth_chain is not None:
# If we have any state or auth_chain given to us by the replication
@ -633,7 +625,7 @@ class FederationHandler(BaseHandler):
failed_to_fetch = missing_auth - set(auth_events)
seen_events = yield self.store.have_events(
seen_events = yield self.store.have_seen_events(
set(auth_events.keys()) | set(state_events.keys())
)
@ -1736,7 +1728,8 @@ class FederationHandler(BaseHandler):
event_key = None
if event_auth_events - current_state:
have_events = yield self.store.have_events(
# TODO: can we use store.have_seen_events here instead?
have_events = yield self.store.get_seen_events_with_rejections(
event_auth_events - current_state
)
else:
@ -1759,12 +1752,12 @@ class FederationHandler(BaseHandler):
origin, event.room_id, event.event_id
)
seen_remotes = yield self.store.have_events(
seen_remotes = yield self.store.have_seen_events(
[e.event_id for e in remote_auth_chain]
)
for e in remote_auth_chain:
if e.event_id in seen_remotes.keys():
if e.event_id in seen_remotes:
continue
if e.event_id == event.event_id:
@ -1791,7 +1784,7 @@ class FederationHandler(BaseHandler):
except AuthError:
pass
have_events = yield self.store.have_events(
have_events = yield self.store.get_seen_events_with_rejections(
[e_id for e_id, _ in event.auth_events]
)
seen_events = set(have_events.keys())
@ -1876,13 +1869,13 @@ class FederationHandler(BaseHandler):
local_auth_chain,
)
seen_remotes = yield self.store.have_events(
seen_remotes = yield self.store.have_seen_events(
[e.event_id for e in result["auth_chain"]]
)
# 3. Process any remote auth chain events we haven't seen.
for ev in result["auth_chain"]:
if ev.event_id in seen_remotes.keys():
if ev.event_id in seen_remotes:
continue
if ev.event_id == event.event_id:

View file

@ -16,6 +16,7 @@
from collections import OrderedDict, deque, namedtuple
from functools import wraps
import itertools
import logging
import simplejson as json
@ -1320,13 +1321,49 @@ class EventsStore(EventsWorkerStore):
defer.returnValue(set(r["event_id"] for r in rows))
def have_events(self, event_ids):
@defer.inlineCallbacks
def have_seen_events(self, event_ids):
"""Given a list of event ids, check if we have already processed them.
Args:
event_ids (iterable[str]):
Returns:
dict: Has an entry for each event id we already have seen. Maps to
the rejected reason string if we rejected the event, else maps to
None.
Deferred[set[str]]: The events we have already seen.
"""
results = set()
def have_seen_events_txn(txn, chunk):
sql = (
"SELECT event_id FROM events as e WHERE e.event_id IN (%s)"
% (",".join("?" * len(chunk)), )
)
txn.execute(sql, chunk)
for (event_id, ) in txn:
results.add(event_id)
# break the input up into chunks of 100
input_iterator = iter(event_ids)
for chunk in iter(lambda: list(itertools.islice(input_iterator, 100)),
[]):
yield self.runInteraction(
"have_seen_events",
have_seen_events_txn,
chunk,
)
defer.returnValue(results)
def get_seen_events_with_rejections(self, event_ids):
"""Given a list of event ids, check if we rejected them.
Args:
event_ids (list[str])
Returns:
Deferred[dict[str, str|None):
Has an entry for each event id we already have seen. Maps to
the rejected reason string if we rejected the event, else maps
to None.
"""
if not event_ids:
return defer.succeed({})
@ -1348,9 +1385,7 @@ class EventsStore(EventsWorkerStore):
return res
return self.runInteraction(
"have_events", f,
)
return self.runInteraction("get_rejection_reasons", f)
@defer.inlineCallbacks
def count_daily_messages(self):