From 598a1d8ff953c70f9f54564225d693a1bcf42144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Aug 2014 14:19:48 +0100 Subject: [PATCH] Change the way pagination works to support out of order events. --- synapse/api/streams/__init__.py | 19 +- synapse/api/streams/event.py | 90 +++++---- synapse/handlers/presence.py | 2 +- synapse/handlers/room.py | 3 +- synapse/storage/schema/im.sql | 2 +- synapse/storage/stream.py | 186 +++++++++++++++--- .../components/matrix/event-stream-service.js | 1 - webclient/components/matrix/matrix-service.js | 4 +- 8 files changed, 226 insertions(+), 81 deletions(-) diff --git a/synapse/api/streams/__init__.py b/synapse/api/streams/__init__.py index 989e63f9e..44f4cc607 100644 --- a/synapse/api/streams/__init__.py +++ b/synapse/api/streams/__init__.py @@ -20,23 +20,23 @@ class PaginationConfig(object): """A configuration object which stores pagination parameters.""" - def __init__(self, from_tok=None, to_tok=None, limit=0): + def __init__(self, from_tok=None, to_tok=None, direction='f', limit=0): self.from_tok = from_tok self.to_tok = to_tok + self.direction = direction self.limit = limit @classmethod def from_request(cls, request, raise_invalid_params=True): params = { - "from_tok": PaginationStream.TOK_START, - "to_tok": PaginationStream.TOK_END, - "limit": 0 + "direction": 'f', } query_param_mappings = [ # 3-tuple of qp_key, attribute, rules ("from", "from_tok", lambda x: type(x) == str), ("to", "to_tok", lambda x: type(x) == str), - ("limit", "limit", lambda x: x.isdigit()) + ("limit", "limit", lambda x: x.isdigit()), + ("dir", "direction", lambda x: x == 'f' or x == 'b'), ] for qp, attr, is_valid in query_param_mappings: @@ -48,12 +48,17 @@ class PaginationConfig(object): return PaginationConfig(**params) + def __str__(self): + return ( + "" + ) % (self.from_tok, self.to_tok, self.direction, self.limit) + class PaginationStream(object): """ An interface for streaming data as chunks. """ - TOK_START = "START" TOK_END = "END" def get_chunk(self, config=None): @@ -76,7 +81,7 @@ class StreamData(object): self.hs = hs self.store = hs.get_datastore() - def get_rows(self, user_id, from_pkey, to_pkey, limit): + def get_rows(self, user_id, from_pkey, to_pkey, limit, direction): """ Get event stream data between the specified pkeys. Args: diff --git a/synapse/api/streams/event.py b/synapse/api/streams/event.py index 414b05be3..a5c8b2b31 100644 --- a/synapse/api/streams/event.py +++ b/synapse/api/streams/event.py @@ -38,8 +38,8 @@ class EventsStreamData(StreamData): self.with_feedback = feedback @defer.inlineCallbacks - def get_rows(self, user_id, from_key, to_key, limit): - data, latest_ver = yield self.store.get_room_events_stream( + def get_rows(self, user_id, from_key, to_key, limit, direction): + data, latest_ver = yield self.store.get_room_events( user_id=user_id, from_key=from_key, to_key=to_key, @@ -70,6 +70,15 @@ class EventStream(PaginationStream): pagination_config.from_tok) pagination_config.to_tok = yield self.fix_token( pagination_config.to_tok) + + if ( + not pagination_config.to_tok + and pagination_config.direction == 'f' + ): + pagination_config.to_tok = yield self.get_current_max_token() + + logger.debug("pagination_config: %s", pagination_config) + defer.returnValue(pagination_config) @defer.inlineCallbacks @@ -81,39 +90,42 @@ class EventStream(PaginationStream): Returns: The fixed-up token, which may == token. """ - # replace TOK_START and TOK_END with 0_0_0 or -1_-1_-1 depending. - replacements = [ - (PaginationStream.TOK_START, "0"), - (PaginationStream.TOK_END, "-1") - ] - for magic_token, key in replacements: - if magic_token == token: - token = EventStream.SEPARATOR.join( - [key] * len(self.stream_data) - ) + if token == PaginationStream.TOK_END: + new_token = yield self.get_current_max_token() - # replace -1 values with an actual pkey - token_segments = self._split_token(token) - for i, tok in enumerate(token_segments): - if tok == -1: - # add 1 to the max token because results are EXCLUSIVE from the - # latest version. - token_segments[i] = 1 + (yield self.stream_data[i].max_token()) - defer.returnValue(EventStream.SEPARATOR.join( - str(x) for x in token_segments - )) + logger.debug("fix_token: From %s to %s", token, new_token) + + token = new_token + + defer.returnValue(token) @defer.inlineCallbacks - def get_chunk(self, config=None): + def get_current_max_token(self): + new_token_parts = [] + for s in self.stream_data: + mx = yield s.max_token() + new_token_parts.append(str(mx)) + + new_token = EventStream.SEPARATOR.join(new_token_parts) + + logger.debug("get_current_max_token: %s", new_token) + + defer.returnValue(new_token) + + @defer.inlineCallbacks + def get_chunk(self, config): # no support for limit on >1 streams, makes no sense. if config.limit and len(self.stream_data) > 1: raise EventStreamError( 400, "Limit not supported on multiplexed streams." ) - (chunk_data, next_tok) = yield self._get_chunk_data(config.from_tok, - config.to_tok, - config.limit) + chunk_data, next_tok = yield self._get_chunk_data( + config.from_tok, + config.to_tok, + config.limit, + config.direction, + ) defer.returnValue({ "chunk": chunk_data, @@ -122,7 +134,7 @@ class EventStream(PaginationStream): }) @defer.inlineCallbacks - def _get_chunk_data(self, from_tok, to_tok, limit): + def _get_chunk_data(self, from_tok, to_tok, limit, direction): """ Get event data between the two tokens. Tokens are SEPARATOR separated values representing pkey values of @@ -140,11 +152,12 @@ class EventStream(PaginationStream): EventStreamError if something went wrong. """ # sanity check - if (from_tok.count(EventStream.SEPARATOR) != - to_tok.count(EventStream.SEPARATOR) or - (from_tok.count(EventStream.SEPARATOR) + 1) != - len(self.stream_data)): - raise EventStreamError(400, "Token lengths don't match.") + if to_tok is not None: + if (from_tok.count(EventStream.SEPARATOR) != + to_tok.count(EventStream.SEPARATOR) or + (from_tok.count(EventStream.SEPARATOR) + 1) != + len(self.stream_data)): + raise EventStreamError(400, "Token lengths don't match.") chunk = [] next_ver = [] @@ -158,7 +171,7 @@ class EventStream(PaginationStream): continue (event_chunk, max_pkey) = yield self.stream_data[i].get_rows( - self.user_id, from_pkey, to_pkey, limit + self.user_id, from_pkey, to_pkey, limit, direction, ) chunk.extend([ @@ -177,9 +190,8 @@ class EventStream(PaginationStream): Returns: A list of ints. """ - segments = token.split(EventStream.SEPARATOR) - try: - int_segments = [int(x) for x in segments] - except ValueError: - raise EventStreamError(400, "Bad token: %s" % token) - return int_segments + if token: + segments = token.split(EventStream.SEPARATOR) + else: + segments = [None] * len(self.stream_data) + return segments diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index e8cb83edd..f140dc527 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -684,7 +684,7 @@ class PresenceStreamData(StreamData): super(PresenceStreamData, self).__init__(hs) self.presence = hs.get_handlers().presence_handler - def get_rows(self, user_id, from_key, to_key, limit): + def get_rows(self, user_id, from_key, to_key, limit, direction): cachemap = self.presence._user_cachemap # TODO(paul): limit, and filter by visibility diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index e5dd2e245..40867ae2e 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -466,7 +466,7 @@ class RoomMemberHandler(BaseHandler): for entry in member_list ] chunk_data = { - "start": "START", + "start": "START", # FIXME (erikj): START is no longer a valid value "end": "END", "chunk": event_list } @@ -811,4 +811,5 @@ class RoomListHandler(BaseHandler): @defer.inlineCallbacks def get_public_room_list(self): chunk = yield self.store.get_rooms(is_public=True) + # FIXME (erikj): START is no longer a valid value defer.returnValue({"start": "START", "end": "END", "chunk": chunk}) diff --git a/synapse/storage/schema/im.sql b/synapse/storage/schema/im.sql index 0fb3dbee5..ea04261ff 100644 --- a/synapse/storage/schema/im.sql +++ b/synapse/storage/schema/im.sql @@ -14,7 +14,7 @@ */ CREATE TABLE IF NOT EXISTS events( - token_ordering INTEGER PRIMARY KEY AUTOINCREMENT, + stream_ordering INTEGER PRIMARY KEY AUTOINCREMENT, topological_ordering INTEGER NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index c03c983e1..87fc97881 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -16,8 +16,9 @@ from twisted.internet import defer from ._base import SQLBaseStore - +from synapse.api.errors import SynapseError from synapse.api.constants import Membership +from synapse.util.logutils import log_function import json import logging @@ -29,9 +30,96 @@ logger = logging.getLogger(__name__) MAX_STREAM_SIZE = 1000 +_STREAM_TOKEN = "stream" +_TOPOLOGICAL_TOKEN = "topological" + + +def _parse_stream_token(string): + try: + if string[0] != 's': + raise + return int(string[1:]) + except: + logger.debug("Not stream token: %s", string) + raise SynapseError(400, "Invalid token") + + +def _parse_topological_token(string): + try: + if string[0] != 't': + raise + parts = string[1:].split('-', 1) + return (int(parts[0]), int(parts[1])) + except: + logger.debug("Not topological token: %s", string) + raise SynapseError(400, "Invalid token") + + +def is_stream_token(string): + try: + _parse_stream_token(string) + return True + except: + return False + + +def is_topological_token(string): + try: + _parse_topological_token(string) + return True + except: + return False + + +def _get_token_bound(token, comparison): + try: + s = _parse_stream_token(token) + return "%s %s %d" % ("stream_ordering", comparison, s) + except: + pass + + try: + top, stream = _parse_topological_token(token) + return "%s %s %d AND %s %s %d" % ( + "topological_ordering", comparison, top, + "stream_ordering", comparison, stream, + ) + except: + pass + + raise SynapseError(400, "Invalid token") + + class StreamStore(SQLBaseStore): + @log_function + def get_room_events(self, user_id, from_key, to_key, room_id, limit=0, + direction='f', with_feedback=False): + is_events = ( + direction == 'f' + and is_stream_token(from_key) + and to_key and is_stream_token(to_key) + ) + + if is_events: + return self.get_room_events_stream( + user_id=user_id, + from_key=from_key, + to_key=to_key, + room_id=room_id, + limit=limit, + with_feedback=with_feedback, + ) + else: + return self.paginate_room_events( + from_key=from_key, + to_key=to_key, + room_id=room_id, + limit=limit, + with_feedback=with_feedback, + ) @defer.inlineCallbacks + @log_function def get_room_events_stream(self, user_id, from_key, to_key, room_id, limit=0, with_feedback=False): # TODO (erikj): Handle compressed feedback @@ -54,8 +142,8 @@ class StreamStore(SQLBaseStore): limit = MAX_STREAM_SIZE # From and to keys should be integers from ordering. - from_key = int(from_key) - to_key = int(to_key) + from_id = _parse_stream_token(from_key) + to_id = _parse_stream_token(to_key) if from_key == to_key: defer.returnValue(([], to_key)) @@ -65,41 +153,78 @@ class StreamStore(SQLBaseStore): "SELECT * FROM events as e WHERE " "((room_id IN (%(current)s)) OR " "(event_id IN (%(invites)s))) " + "AND e.stream_ordering > ? AND e.stream_ordering < ? " + "ORDER BY stream_ordering ASC LIMIT %(limit)d " ) % { "current": current_room_membership_sql, "invites": invites_sql, + "limit": limit } - # Constraints and ordering depend on direction. - if from_key < to_key: - sql += ( - "AND e.token_ordering > ? AND e.token_ordering < ? " - "ORDER BY token_ordering ASC LIMIT %(limit)d " - ) % {"limit": limit} - else: - sql += ( - "AND e.token_ordering < ? " - "AND e.token_ordering > ? " - "ORDER BY e.token_ordering DESC LIMIT %(limit)d " - ) % {"limit": int(limit)} - rows = yield self._execute_and_decode( sql, - user_id, user_id, Membership.INVITE, from_key, to_key + user_id, user_id, Membership.INVITE, from_id, to_id ) ret = [self._parse_event_from_row(r) for r in rows] if rows: - if from_key < to_key: - key = max([r["token_ordering"] for r in rows]) - else: - key = min([r["token_ordering"] for r in rows]) + key = "s%d" % max([r["stream_ordering"] for r in rows]) else: + # Assume we didn't get anything because there was nothing to get. key = to_key defer.returnValue((ret, key)) + @defer.inlineCallbacks + @log_function + def paginate_room_events(self, room_id, from_key, to_key=None, + direction='b', limit=-1, + with_feedback=False): + # TODO (erikj): Handle compressed feedback + + from_comp = '<' if direction =='b' else '>' + to_comp = '>' if direction =='b' else '<' + order = "DESC" if direction == 'b' else "ASC" + + args = [room_id] + + bounds = _get_token_bound(from_key, from_comp) + if to_key: + bounds = "%s AND %s" % (bounds, _get_token_bound(to_key, to_comp)) + + if int(limit) > 0: + args.append(int(limit)) + limit_str = " LIMIT ?" + else: + limit_str = "" + + sql = ( + "SELECT * FROM events " + "WHERE room_id = ? AND %(bounds)s " + "ORDER BY topological_ordering %(order)s, stream_ordering %(order)s %(limit)s " + ) % {"bounds": bounds, "order": order, "limit": limit_str} + + rows = yield self._execute_and_decode( + sql, + *args + ) + + if rows: + topo = rows[-1]["topological_ordering"] + toke = rows[-1]["stream_ordering"] + next_token = "t%s-%s" % (topo, toke) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_key if to_key else from_key + + defer.returnValue( + ( + [self._parse_event_from_row(r) for r in rows], + next_token + ) + ) + @defer.inlineCallbacks def get_recent_events_for_room(self, room_id, limit, with_feedback=False): # TODO (erikj): Handle compressed feedback @@ -108,8 +233,8 @@ class StreamStore(SQLBaseStore): sql = ( "SELECT * FROM events " - "WHERE room_id = ? AND token_ordering <= ? " - "ORDER BY topological_ordering, token_ordering DESC LIMIT ? " + "WHERE room_id = ? AND stream_ordering <= ? " + "ORDER BY topological_ordering, stream_ordering DESC LIMIT ? " ) rows = yield self._execute_and_decode( @@ -121,12 +246,12 @@ class StreamStore(SQLBaseStore): if rows: topo = rows[0]["topological_ordering"] - toke = rows[0]["token_ordering"] + toke = rows[0]["stream_ordering"] start_token = "p%s-%s" % (topo, toke) token = (start_token, end_token) else: - token = ("START", end_token) + token = (end_token, end_token) defer.returnValue( ( @@ -138,11 +263,14 @@ class StreamStore(SQLBaseStore): @defer.inlineCallbacks def get_room_events_max_id(self): res = yield self._execute_and_decode( - "SELECT MAX(token_ordering) as m FROM events" + "SELECT MAX(stream_ordering) as m FROM events" ) - if not res: - defer.returnValue(0) + logger.debug("get_room_events_max_id: %s", res) + + if not res or not res[0] or not res[0]["m"]: + defer.returnValue("s1") return - defer.returnValue(res[0]["m"]) + key = res[0]["m"] + 1 + defer.returnValue("s%d" % (key,)) diff --git a/webclient/components/matrix/event-stream-service.js b/webclient/components/matrix/event-stream-service.js index c94cf0fe7..a446fad5d 100644 --- a/webclient/components/matrix/event-stream-service.js +++ b/webclient/components/matrix/event-stream-service.js @@ -25,7 +25,6 @@ the eventHandlerService. angular.module('eventStreamService', []) .factory('eventStreamService', ['$q', '$timeout', 'matrixService', 'eventHandlerService', function($q, $timeout, matrixService, eventHandlerService) { var END = "END"; - var START = "START"; var TIMEOUT_MS = 30000; var ERR_TIMEOUT_MS = 5000; diff --git a/webclient/components/matrix/matrix-service.js b/webclient/components/matrix/matrix-service.js index c52c94c31..3cd0aa674 100644 --- a/webclient/components/matrix/matrix-service.js +++ b/webclient/components/matrix/matrix-service.js @@ -230,8 +230,8 @@ angular.module('matrixService', []) path = path.replace("$room_id", room_id); var params = { from: from_token, - to: "START", - limit: limit + limit: limit, + dir: 'b' }; return doRequest("GET", path, params); },