Stream ordering and out of order insertions.

Handle the fact that events can be persisted out of order, and so to get
the "current max" stream token becomes non trivial - as we need to make
sure that *all* stream tokens less than the current max have also
successfully been persisted.
This commit is contained in:
Erik Johnston 2015-04-09 11:41:36 +01:00
parent 22d7a59306
commit 8ad0f4912e
No known key found for this signature in database
GPG key ID: 3ADA06EDC753D11E
5 changed files with 153 additions and 65 deletions

View file

@ -22,6 +22,8 @@ from synapse.util.logcontext import PreserveLoggingContext, LoggingContext
from synapse.util.lrucache import LruCache
import synapse.metrics
from util.id_generators import IdGenerator, StreamIdGenerator
from twisted.internet import defer
from collections import namedtuple, OrderedDict
@ -29,7 +31,6 @@ import functools
import simplejson as json
import sys
import time
import threading
logger = logging.getLogger(__name__)
@ -232,46 +233,6 @@ class PerformanceCounters(object):
return top_n_counters
class IdGenerator(object):
def __init__(self, table, column, store):
self.table = table
self.column = column
self.store = store
self._lock = threading.Lock()
self._next_id = None
@defer.inlineCallbacks
def get_next(self):
with self._lock:
if not self._next_id:
res = yield self.store._execute_and_decode(
"IdGenerator_%s" % (self.table,),
"SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,)
)
self._next_id = (res and res[0] and res[0]["mx"]) or 1
i = self._next_id
self._next_id += 1
defer.returnValue(i)
def get_next_txn(self, txn):
with self._lock:
if self._next_id:
i = self._next_id
self._next_id += 1
return i
else:
txn.execute(
"SELECT MAX(%s) FROM %s" % (self.column, self.table,)
)
val, = txn.fetchone()
self._next_id = val or 2
return 1
class SQLBaseStore(object):
_TXN_ID = 0
@ -297,7 +258,7 @@ class SQLBaseStore(object):
# Pretend the getEventCache is just another named cache
caches_by_name["*getEvent*"] = self._get_event_cache
self._stream_id_gen = IdGenerator("events", "stream_ordering", self)
self._stream_id_gen = StreamIdGenerator()
self._transaction_id_gen = IdGenerator("sent_transactions", "id", self)
self._state_groups_id_gen = IdGenerator("state_groups", "id", self)
self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self)
@ -363,7 +324,6 @@ class SQLBaseStore(object):
*args, **kwargs
)
except self.database_engine.module.DatabaseError as e:
logger.warn("[TXN DEADLOCK] {%s} %r, %r", name, e.errno, e.sqlstate)
if self.database_engine.is_deadlock(e):
logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
if i < N:

View file

@ -52,7 +52,6 @@ class EventsStore(SQLBaseStore):
is_new_state=is_new_state,
current_state=current_state,
)
self.get_room_events_max_id.invalidate()
except _RollbackButIsFineException:
pass
@ -97,7 +96,13 @@ class EventsStore(SQLBaseStore):
self._get_event_cache.pop(event.event_id)
if stream_ordering is None:
stream_ordering = self._stream_id_gen.get_next_txn(txn)
with self._stream_id_gen.get_next_txn(txn) as stream_ordering:
return self._persist_event_txn(
txn, event, context, backfilled,
stream_ordering=stream_ordering,
is_new_state=is_new_state,
current_state=current_state,
)
# We purposefully do this first since if we include a `current_state`
# key, we *want* to update the `current_state_events` table

View file

@ -413,12 +413,10 @@ class StreamStore(SQLBaseStore):
"get_recent_events_for_room", get_recent_events_for_room_txn
)
@cached(num_args=0)
@defer.inlineCallbacks
def get_room_events_max_id(self):
return self.runInteraction(
"get_room_events_max_id",
self._get_room_events_max_id_txn
)
token = yield self._stream_id_gen.get_max_token(self)
defer.returnValue("s%d" % (token,))
@defer.inlineCallbacks
def _get_min_token(self):
@ -433,21 +431,6 @@ class StreamStore(SQLBaseStore):
defer.returnValue(self.min_token)
def _get_room_events_max_id_txn(self, txn):
txn.execute(
"SELECT MAX(stream_ordering) as m FROM events"
)
res = self.cursor_to_dict(txn)
logger.debug("get_room_events_max_id: %s", res)
if not res or not res[0] or not res[0]["m"]:
return "s0"
key = res[0]["m"]
return "s%d" % (key,)
@staticmethod
def _set_before_and_after(events, rows):
for event, row in zip(events, rows):

View file

@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View file

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer
from collections import deque
import contextlib
import threading
class IdGenerator(object):
def __init__(self, table, column, store):
self.table = table
self.column = column
self.store = store
self._lock = threading.Lock()
self._next_id = None
@defer.inlineCallbacks
def get_next(self):
with self._lock:
if not self._next_id:
res = yield self.store._execute_and_decode(
"IdGenerator_%s" % (self.table,),
"SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,)
)
self._next_id = (res and res[0] and res[0]["mx"]) or 1
i = self._next_id
self._next_id += 1
defer.returnValue(i)
def get_next_txn(self, txn):
with self._lock:
if self._next_id:
i = self._next_id
self._next_id += 1
return i
else:
txn.execute(
"SELECT MAX(%s) FROM %s" % (self.column, self.table,)
)
val, = txn.fetchone()
self._next_id = val or 2
return 1
class StreamIdGenerator(object):
"""Used to generate new stream ids when persisting events while keeping
track of which transactions have been completed.
This allows us to get the "current" stream id, i.e. the stream id such that
all ids less than or equal to it have completed. This handles the fact that
persistence of events can complete out of order.
Usage:
with stream_id_gen.get_next_txn(txn) as stream_id:
# ... persist event ...
"""
def __init__(self):
self._lock = threading.Lock()
self._current_max = None
self._unfinished_ids = deque()
def get_next_txn(self, txn):
"""
Usage:
with stream_id_gen.get_next_txn(txn) as stream_id:
# ... persist event ...
"""
with self._lock:
if not self._current_max:
self._compute_current_max(txn)
self._current_max += 1
next_id = self._current_max
self._unfinished_ids.append(next_id)
@contextlib.contextmanager
def manager():
yield next_id
with self._lock:
self._unfinished_ids.remove(next_id)
return manager()
def get_max_token(self, store):
"""Returns the maximum stream id such that all stream ids less than or
equal to it have been successfully persisted.
"""
with self._lock:
if self._unfinished_ids:
return self._unfinished_ids[0] - 1
if not self._current_max:
return store.runInteraction(
"_compute_current_max",
self._compute_current_max,
)
return self._current_max
def _compute_current_max(self, txn):
txn.execute("SELECT MAX(stream_ordering) FROM events")
val, = txn.fetchone()
self._current_max = int(val) if val else 1
return self._current_max