Run the background updates when starting synapse.

This commit is contained in:
Mark Haines 2015-11-10 15:50:58 +00:00
parent 2ede7aa8a1
commit a412b9a465
4 changed files with 67 additions and 10 deletions

View file

@ -439,6 +439,7 @@ def setup(config_options):
hs.get_pusherpool().start() hs.get_pusherpool().start()
hs.get_state_handler().start_caching() hs.get_state_handler().start_caching()
hs.get_datastore().start_profiling() hs.get_datastore().start_profiling()
hs.get_datastore().start_doing_background_updates()
hs.get_replication_layer().start_get_pdu_cache() hs.get_replication_layer().start_get_pdu_cache()
return hs return hs

View file

@ -43,7 +43,7 @@ class BackgroundUpdatePerformance(object):
self.avg_item_count += 0.1 * (item_count - self.avg_item_count) self.avg_item_count += 0.1 * (item_count - self.avg_item_count)
self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms) self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms)
def average_duration_ms_per_item(self): def average_items_per_ms(self):
"""An estimate of how long it takes to do a single update. """An estimate of how long it takes to do a single update.
Returns: Returns:
A duration in ms as a float A duration in ms as a float
@ -53,7 +53,17 @@ class BackgroundUpdatePerformance(object):
else: else:
# Use the exponential moving average so that we can adapt to # Use the exponential moving average so that we can adapt to
# changes in how long the update process takes. # changes in how long the update process takes.
return float(self.avg_duration_ms) / float(self.avg_item_count) return float(self.avg_item_count) / float(self.avg_duration_ms)
def total_items_per_ms(self):
"""An estimate of how long it takes to do a single update.
Returns:
A duration in ms as a float
"""
if self.total_item_count == 0:
return None
else:
return float(self.total_item_count) / float(self.total_duration_ms)
class BackgroundUpdateStore(SQLBaseStore): class BackgroundUpdateStore(SQLBaseStore):
@ -65,12 +75,41 @@ class BackgroundUpdateStore(SQLBaseStore):
MINIMUM_BACKGROUND_BATCH_SIZE = 100 MINIMUM_BACKGROUND_BATCH_SIZE = 100
DEFAULT_BACKGROUND_BATCH_SIZE = 100 DEFAULT_BACKGROUND_BATCH_SIZE = 100
BACKGROUND_UPDATE_INTERVAL_MS = 1000
BACKGROUND_UPDATE_DURATION_MS = 100
def __init__(self, hs): def __init__(self, hs):
super(BackgroundUpdateStore, self).__init__(hs) super(BackgroundUpdateStore, self).__init__(hs)
self._background_update_performance = {} self._background_update_performance = {}
self._background_update_queue = [] self._background_update_queue = []
self._background_update_handlers = {} self._background_update_handlers = {}
self._background_update_timer = None
@defer.inlineCallbacks
def start_doing_background_updates(self):
while True:
if self._background_update_timer is not None:
return
sleep = defer.Deferred()
self._background_update_timer = self._clock.call_later(
self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback
)
try:
yield sleep
finally:
self._background_update_timer = None
result = yield self.do_background_update(
self.BACKGROUND_UPDATE_DURATION_MS
)
if result is None:
logger.info(
"No more background updates to do."
" Unscheduling background update task."
)
return
@defer.inlineCallbacks @defer.inlineCallbacks
def do_background_update(self, desired_duration_ms): def do_background_update(self, desired_duration_ms):
@ -106,10 +145,10 @@ class BackgroundUpdateStore(SQLBaseStore):
performance = BackgroundUpdatePerformance(update_name) performance = BackgroundUpdatePerformance(update_name)
self._background_update_performance[update_name] = performance self._background_update_performance[update_name] = performance
duration_ms_per_item = performance.average_duration_ms_per_item() items_per_ms = performance.average_items_per_ms()
if duration_ms_per_item is not None: if items_per_ms is not None:
batch_size = int(desired_duration_ms / duration_ms_per_item) batch_size = int(desired_duration_ms * items_per_ms)
# Clamp the batch size so that we always make progress # Clamp the batch size so that we always make progress
batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE)
else: else:
@ -130,8 +169,12 @@ class BackgroundUpdateStore(SQLBaseStore):
duration_ms = time_stop - time_start duration_ms = time_stop - time_start
logger.info( logger.info(
"Updating %. Updated %r items in %rms", "Updating %. Updated %r items in %rms."
update_name, items_updated, duration_ms " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)",
update_name, items_updated, duration_ms,
performance.total_items_per_ms(),
performance.average_items_per_ms(),
performance.total_item_count,
) )
performance.update(items_updated, duration_ms) performance.update(items_updated, duration_ms)

View file

@ -29,6 +29,11 @@ class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_UPDATE_NAME = "event_search"
def __init__(self, hs):
super(SearchStore, self).__init__(hs)
self.register_background_update_handler(
self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search
)
@defer.inlineCallbacks @defer.inlineCallbacks
def _background_reindex_search(self, progress, batch_size): def _background_reindex_search(self, progress, batch_size):
@ -74,7 +79,7 @@ class SearchStore(BackgroundUpdateStore):
elif event.type == "m.room.name": elif event.type == "m.room.name":
key = "content.name" key = "content.name"
value = content["name"] value = content["name"]
except Exception: except (KeyError, AttributeError):
# If the event is missing a necessary field then # If the event is missing a necessary field then
# skip over it. # skip over it.
continue continue
@ -96,7 +101,7 @@ class SearchStore(BackgroundUpdateStore):
raise Exception("Unrecognized database engine") raise Exception("Unrecognized database engine")
for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE):
clump = event_search_rows[index : index + INSERT_CLUMP_SIZE) clump = event_search_rows[index:index + INSERT_CLUMP_SIZE]
txn.execute_many(sql, clump) txn.execute_many(sql, clump)
progress = { progress = {
@ -116,7 +121,7 @@ class SearchStore(BackgroundUpdateStore):
) )
if result is None: if result is None:
yield _end_background_update(self.EVENT_SEARCH_UPDATE_NAME) yield self._end_background_update(self.EVENT_SEARCH_UPDATE_NAME)
defer.returnValue(result) defer.returnValue(result)

View file

@ -53,6 +53,14 @@ class Clock(object):
loop.stop() loop.stop()
def call_later(self, delay, callback, *args, **kwargs): def call_later(self, delay, callback, *args, **kwargs):
"""Call something later
Args:
delay(float): How long to wait in seconds.
callback(function): Function to call
*args: Postional arguments to pass to function.
**kwargs: Key arguments to pass to function.
"""
current_context = LoggingContext.current_context() current_context = LoggingContext.current_context()
def wrapped_callback(*args, **kwargs): def wrapped_callback(*args, **kwargs):