forked from MirrorHub/synapse
Stream ordering and out of order insertions.
Handle the fact that events can be persisted out of order, and so to get the "current max" stream token becomes non trivial - as we need to make sure that *all* stream tokens less than the current max have also successfully been persisted.
This commit is contained in:
parent
22d7a59306
commit
8ad0f4912e
5 changed files with 153 additions and 65 deletions
|
@ -22,6 +22,8 @@ from synapse.util.logcontext import PreserveLoggingContext, LoggingContext
|
|||
from synapse.util.lrucache import LruCache
|
||||
import synapse.metrics
|
||||
|
||||
from util.id_generators import IdGenerator, StreamIdGenerator
|
||||
|
||||
from twisted.internet import defer
|
||||
|
||||
from collections import namedtuple, OrderedDict
|
||||
|
@ -29,7 +31,6 @@ import functools
|
|||
import simplejson as json
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -232,46 +233,6 @@ class PerformanceCounters(object):
|
|||
return top_n_counters
|
||||
|
||||
|
||||
class IdGenerator(object):
|
||||
def __init__(self, table, column, store):
|
||||
self.table = table
|
||||
self.column = column
|
||||
self.store = store
|
||||
self._lock = threading.Lock()
|
||||
self._next_id = None
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_next(self):
|
||||
with self._lock:
|
||||
if not self._next_id:
|
||||
res = yield self.store._execute_and_decode(
|
||||
"IdGenerator_%s" % (self.table,),
|
||||
"SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,)
|
||||
)
|
||||
|
||||
self._next_id = (res and res[0] and res[0]["mx"]) or 1
|
||||
|
||||
i = self._next_id
|
||||
self._next_id += 1
|
||||
defer.returnValue(i)
|
||||
|
||||
def get_next_txn(self, txn):
|
||||
with self._lock:
|
||||
if self._next_id:
|
||||
i = self._next_id
|
||||
self._next_id += 1
|
||||
return i
|
||||
else:
|
||||
txn.execute(
|
||||
"SELECT MAX(%s) FROM %s" % (self.column, self.table,)
|
||||
)
|
||||
|
||||
val, = txn.fetchone()
|
||||
self._next_id = val or 2
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
class SQLBaseStore(object):
|
||||
_TXN_ID = 0
|
||||
|
||||
|
@ -297,7 +258,7 @@ class SQLBaseStore(object):
|
|||
# Pretend the getEventCache is just another named cache
|
||||
caches_by_name["*getEvent*"] = self._get_event_cache
|
||||
|
||||
self._stream_id_gen = IdGenerator("events", "stream_ordering", self)
|
||||
self._stream_id_gen = StreamIdGenerator()
|
||||
self._transaction_id_gen = IdGenerator("sent_transactions", "id", self)
|
||||
self._state_groups_id_gen = IdGenerator("state_groups", "id", self)
|
||||
self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self)
|
||||
|
@ -363,7 +324,6 @@ class SQLBaseStore(object):
|
|||
*args, **kwargs
|
||||
)
|
||||
except self.database_engine.module.DatabaseError as e:
|
||||
logger.warn("[TXN DEADLOCK] {%s} %r, %r", name, e.errno, e.sqlstate)
|
||||
if self.database_engine.is_deadlock(e):
|
||||
logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
|
||||
if i < N:
|
||||
|
|
|
@ -52,7 +52,6 @@ class EventsStore(SQLBaseStore):
|
|||
is_new_state=is_new_state,
|
||||
current_state=current_state,
|
||||
)
|
||||
self.get_room_events_max_id.invalidate()
|
||||
except _RollbackButIsFineException:
|
||||
pass
|
||||
|
||||
|
@ -97,7 +96,13 @@ class EventsStore(SQLBaseStore):
|
|||
self._get_event_cache.pop(event.event_id)
|
||||
|
||||
if stream_ordering is None:
|
||||
stream_ordering = self._stream_id_gen.get_next_txn(txn)
|
||||
with self._stream_id_gen.get_next_txn(txn) as stream_ordering:
|
||||
return self._persist_event_txn(
|
||||
txn, event, context, backfilled,
|
||||
stream_ordering=stream_ordering,
|
||||
is_new_state=is_new_state,
|
||||
current_state=current_state,
|
||||
)
|
||||
|
||||
# We purposefully do this first since if we include a `current_state`
|
||||
# key, we *want* to update the `current_state_events` table
|
||||
|
|
|
@ -413,12 +413,10 @@ class StreamStore(SQLBaseStore):
|
|||
"get_recent_events_for_room", get_recent_events_for_room_txn
|
||||
)
|
||||
|
||||
@cached(num_args=0)
|
||||
@defer.inlineCallbacks
|
||||
def get_room_events_max_id(self):
|
||||
return self.runInteraction(
|
||||
"get_room_events_max_id",
|
||||
self._get_room_events_max_id_txn
|
||||
)
|
||||
token = yield self._stream_id_gen.get_max_token(self)
|
||||
defer.returnValue("s%d" % (token,))
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _get_min_token(self):
|
||||
|
@ -433,21 +431,6 @@ class StreamStore(SQLBaseStore):
|
|||
|
||||
defer.returnValue(self.min_token)
|
||||
|
||||
def _get_room_events_max_id_txn(self, txn):
|
||||
txn.execute(
|
||||
"SELECT MAX(stream_ordering) as m FROM events"
|
||||
)
|
||||
|
||||
res = self.cursor_to_dict(txn)
|
||||
|
||||
logger.debug("get_room_events_max_id: %s", res)
|
||||
|
||||
if not res or not res[0] or not res[0]["m"]:
|
||||
return "s0"
|
||||
|
||||
key = res[0]["m"]
|
||||
return "s%d" % (key,)
|
||||
|
||||
@staticmethod
|
||||
def _set_before_and_after(events, rows):
|
||||
for event, row in zip(events, rows):
|
||||
|
|
14
synapse/storage/util/__init__.py
Normal file
14
synapse/storage/util/__init__.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014, 2015 OpenMarket Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
126
synapse/storage/util/id_generators.py
Normal file
126
synapse/storage/util/id_generators.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014, 2015 OpenMarket Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from twisted.internet import defer
|
||||
|
||||
from collections import deque
|
||||
import contextlib
|
||||
import threading
|
||||
|
||||
|
||||
class IdGenerator(object):
|
||||
def __init__(self, table, column, store):
|
||||
self.table = table
|
||||
self.column = column
|
||||
self.store = store
|
||||
self._lock = threading.Lock()
|
||||
self._next_id = None
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_next(self):
|
||||
with self._lock:
|
||||
if not self._next_id:
|
||||
res = yield self.store._execute_and_decode(
|
||||
"IdGenerator_%s" % (self.table,),
|
||||
"SELECT MAX(%s) as mx FROM %s" % (self.column, self.table,)
|
||||
)
|
||||
|
||||
self._next_id = (res and res[0] and res[0]["mx"]) or 1
|
||||
|
||||
i = self._next_id
|
||||
self._next_id += 1
|
||||
defer.returnValue(i)
|
||||
|
||||
def get_next_txn(self, txn):
|
||||
with self._lock:
|
||||
if self._next_id:
|
||||
i = self._next_id
|
||||
self._next_id += 1
|
||||
return i
|
||||
else:
|
||||
txn.execute(
|
||||
"SELECT MAX(%s) FROM %s" % (self.column, self.table,)
|
||||
)
|
||||
|
||||
val, = txn.fetchone()
|
||||
self._next_id = val or 2
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
class StreamIdGenerator(object):
|
||||
"""Used to generate new stream ids when persisting events while keeping
|
||||
track of which transactions have been completed.
|
||||
|
||||
This allows us to get the "current" stream id, i.e. the stream id such that
|
||||
all ids less than or equal to it have completed. This handles the fact that
|
||||
persistence of events can complete out of order.
|
||||
|
||||
Usage:
|
||||
with stream_id_gen.get_next_txn(txn) as stream_id:
|
||||
# ... persist event ...
|
||||
"""
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
|
||||
self._current_max = None
|
||||
self._unfinished_ids = deque()
|
||||
|
||||
def get_next_txn(self, txn):
|
||||
"""
|
||||
Usage:
|
||||
with stream_id_gen.get_next_txn(txn) as stream_id:
|
||||
# ... persist event ...
|
||||
"""
|
||||
with self._lock:
|
||||
if not self._current_max:
|
||||
self._compute_current_max(txn)
|
||||
|
||||
self._current_max += 1
|
||||
next_id = self._current_max
|
||||
|
||||
self._unfinished_ids.append(next_id)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def manager():
|
||||
yield next_id
|
||||
with self._lock:
|
||||
self._unfinished_ids.remove(next_id)
|
||||
|
||||
return manager()
|
||||
|
||||
def get_max_token(self, store):
|
||||
"""Returns the maximum stream id such that all stream ids less than or
|
||||
equal to it have been successfully persisted.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._unfinished_ids:
|
||||
return self._unfinished_ids[0] - 1
|
||||
|
||||
if not self._current_max:
|
||||
return store.runInteraction(
|
||||
"_compute_current_max",
|
||||
self._compute_current_max,
|
||||
)
|
||||
|
||||
return self._current_max
|
||||
|
||||
def _compute_current_max(self, txn):
|
||||
txn.execute("SELECT MAX(stream_ordering) FROM events")
|
||||
val, = txn.fetchone()
|
||||
|
||||
self._current_max = int(val) if val else 1
|
||||
|
||||
return self._current_max
|
Loading…
Reference in a new issue