mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-15 15:53:51 +01:00
Merge branch 'develop' into markjh/direct_to_device_federation
This commit is contained in:
commit
fa9d36e050
13 changed files with 547 additions and 137 deletions
|
@ -15,9 +15,30 @@
|
||||||
|
|
||||||
|
|
||||||
class EventContext(object):
|
class EventContext(object):
|
||||||
|
__slots__ = [
|
||||||
|
"current_state_ids",
|
||||||
|
"prev_state_ids",
|
||||||
|
"state_group",
|
||||||
|
"rejected",
|
||||||
|
"push_actions",
|
||||||
|
"prev_group",
|
||||||
|
"delta_ids",
|
||||||
|
"prev_state_events",
|
||||||
|
]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
# The current state including the current event
|
||||||
self.current_state_ids = None
|
self.current_state_ids = None
|
||||||
|
# The current state excluding the current event
|
||||||
self.prev_state_ids = None
|
self.prev_state_ids = None
|
||||||
self.state_group = None
|
self.state_group = None
|
||||||
|
|
||||||
self.rejected = False
|
self.rejected = False
|
||||||
self.push_actions = []
|
self.push_actions = []
|
||||||
|
|
||||||
|
# A previously persisted state group and a delta between that
|
||||||
|
# and this state.
|
||||||
|
self.prev_group = None
|
||||||
|
self.delta_ids = None
|
||||||
|
|
||||||
|
self.prev_state_events = None
|
||||||
|
|
|
@ -832,11 +832,13 @@ class FederationHandler(BaseHandler):
|
||||||
|
|
||||||
new_pdu = event
|
new_pdu = event
|
||||||
|
|
||||||
message_handler = self.hs.get_handlers().message_handler
|
users_in_room = yield self.store.get_joined_users_from_context(event, context)
|
||||||
destinations = yield message_handler.get_joined_hosts_for_room_from_state(
|
|
||||||
context
|
destinations = set(
|
||||||
|
get_domain_from_id(user_id) for user_id in users_in_room
|
||||||
|
if not self.hs.is_mine_id(user_id)
|
||||||
)
|
)
|
||||||
destinations = set(destinations)
|
|
||||||
destinations.discard(origin)
|
destinations.discard(origin)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
@ -1055,11 +1057,12 @@ class FederationHandler(BaseHandler):
|
||||||
|
|
||||||
new_pdu = event
|
new_pdu = event
|
||||||
|
|
||||||
message_handler = self.hs.get_handlers().message_handler
|
users_in_room = yield self.store.get_joined_users_from_context(event, context)
|
||||||
destinations = yield message_handler.get_joined_hosts_for_room_from_state(
|
|
||||||
context
|
destinations = set(
|
||||||
|
get_domain_from_id(user_id) for user_id in users_in_room
|
||||||
|
if not self.hs.is_mine_id(user_id)
|
||||||
)
|
)
|
||||||
destinations = set(destinations)
|
|
||||||
destinations.discard(origin)
|
destinations.discard(origin)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|
|
@ -30,7 +30,6 @@ from synapse.util.async import concurrently_execute, run_on_reactor, ReadWriteLo
|
||||||
from synapse.util.caches.snapshot_cache import SnapshotCache
|
from synapse.util.caches.snapshot_cache import SnapshotCache
|
||||||
from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred
|
from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred
|
||||||
from synapse.util.metrics import measure_func
|
from synapse.util.metrics import measure_func
|
||||||
from synapse.util.caches.descriptors import cachedInlineCallbacks
|
|
||||||
from synapse.visibility import filter_events_for_client
|
from synapse.visibility import filter_events_for_client
|
||||||
|
|
||||||
from ._base import BaseHandler
|
from ._base import BaseHandler
|
||||||
|
@ -945,7 +944,12 @@ class MessageHandler(BaseHandler):
|
||||||
event_stream_id, max_stream_id
|
event_stream_id, max_stream_id
|
||||||
)
|
)
|
||||||
|
|
||||||
destinations = yield self.get_joined_hosts_for_room_from_state(context)
|
users_in_room = yield self.store.get_joined_users_from_context(event, context)
|
||||||
|
|
||||||
|
destinations = [
|
||||||
|
get_domain_from_id(user_id) for user_id in users_in_room
|
||||||
|
if not self.hs.is_mine_id(user_id)
|
||||||
|
]
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _notify():
|
def _notify():
|
||||||
|
@ -963,39 +967,3 @@ class MessageHandler(BaseHandler):
|
||||||
preserve_fn(federation_handler.handle_new_event)(
|
preserve_fn(federation_handler.handle_new_event)(
|
||||||
event, destinations=destinations,
|
event, destinations=destinations,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_joined_hosts_for_room_from_state(self, context):
|
|
||||||
state_group = context.state_group
|
|
||||||
if not state_group:
|
|
||||||
# If state_group is None it means it has yet to be assigned a
|
|
||||||
# state group, i.e. we need to make sure that calls with a state_group
|
|
||||||
# of None don't hit previous cached calls with a None state_group.
|
|
||||||
# To do this we set the state_group to a new object as object() != object()
|
|
||||||
state_group = object()
|
|
||||||
|
|
||||||
return self._get_joined_hosts_for_room_from_state(
|
|
||||||
state_group, context.current_state_ids
|
|
||||||
)
|
|
||||||
|
|
||||||
@cachedInlineCallbacks(num_args=1, cache_context=True)
|
|
||||||
def _get_joined_hosts_for_room_from_state(self, state_group, current_state_ids,
|
|
||||||
cache_context):
|
|
||||||
|
|
||||||
# Don't bother getting state for people on the same HS
|
|
||||||
current_state = yield self.store.get_events([
|
|
||||||
e_id for key, e_id in current_state_ids.items()
|
|
||||||
if key[0] == EventTypes.Member and not self.hs.is_mine_id(key[1])
|
|
||||||
])
|
|
||||||
|
|
||||||
destinations = set()
|
|
||||||
for e in current_state.itervalues():
|
|
||||||
try:
|
|
||||||
if e.type == EventTypes.Member:
|
|
||||||
if e.content["membership"] == Membership.JOIN:
|
|
||||||
destinations.add(get_domain_from_id(e.state_key))
|
|
||||||
except SynapseError:
|
|
||||||
logger.warn(
|
|
||||||
"Failed to get destination from event %s", e.event_id
|
|
||||||
)
|
|
||||||
|
|
||||||
defer.returnValue(destinations)
|
|
||||||
|
|
|
@ -52,6 +52,11 @@ bump_active_time_counter = metrics.register_counter("bump_active_time")
|
||||||
|
|
||||||
get_updates_counter = metrics.register_counter("get_updates", labels=["type"])
|
get_updates_counter = metrics.register_counter("get_updates", labels=["type"])
|
||||||
|
|
||||||
|
notify_reason_counter = metrics.register_counter("notify_reason", labels=["reason"])
|
||||||
|
state_transition_counter = metrics.register_counter(
|
||||||
|
"state_transition", labels=["from", "to"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# If a user was last active in the last LAST_ACTIVE_GRANULARITY, consider them
|
# If a user was last active in the last LAST_ACTIVE_GRANULARITY, consider them
|
||||||
# "currently_active"
|
# "currently_active"
|
||||||
|
@ -939,27 +944,32 @@ class PresenceHandler(object):
|
||||||
def should_notify(old_state, new_state):
|
def should_notify(old_state, new_state):
|
||||||
"""Decides if a presence state change should be sent to interested parties.
|
"""Decides if a presence state change should be sent to interested parties.
|
||||||
"""
|
"""
|
||||||
|
if old_state == new_state:
|
||||||
|
return False
|
||||||
|
|
||||||
if old_state.status_msg != new_state.status_msg:
|
if old_state.status_msg != new_state.status_msg:
|
||||||
|
notify_reason_counter.inc("status_msg_change")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if old_state.state != new_state.state:
|
||||||
|
notify_reason_counter.inc("state_change")
|
||||||
|
state_transition_counter.inc(old_state.state, new_state.state)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if old_state.state == PresenceState.ONLINE:
|
if old_state.state == PresenceState.ONLINE:
|
||||||
if new_state.state != PresenceState.ONLINE:
|
|
||||||
# Always notify for online -> anything
|
|
||||||
return True
|
|
||||||
|
|
||||||
if new_state.currently_active != old_state.currently_active:
|
if new_state.currently_active != old_state.currently_active:
|
||||||
|
notify_reason_counter.inc("current_active_change")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
||||||
# Only notify about last active bumps if we're not currently acive
|
# Only notify about last active bumps if we're not currently acive
|
||||||
if not (old_state.currently_active and new_state.currently_active):
|
if not new_state.currently_active:
|
||||||
|
notify_reason_counter.inc("last_active_change_online")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
||||||
# Always notify for a transition where last active gets bumped.
|
# Always notify for a transition where last active gets bumped.
|
||||||
return True
|
notify_reason_counter.inc("last_active_change_not_online")
|
||||||
|
|
||||||
if old_state.state != new_state.state:
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -86,6 +86,9 @@ class SlavedEventStore(BaseSlavedStore):
|
||||||
_get_state_groups_from_groups = (
|
_get_state_groups_from_groups = (
|
||||||
StateStore.__dict__["_get_state_groups_from_groups"]
|
StateStore.__dict__["_get_state_groups_from_groups"]
|
||||||
)
|
)
|
||||||
|
_get_state_groups_from_groups_txn = (
|
||||||
|
DataStore._get_state_groups_from_groups_txn.__func__
|
||||||
|
)
|
||||||
_get_state_group_from_group = (
|
_get_state_group_from_group = (
|
||||||
StateStore.__dict__["_get_state_group_from_group"]
|
StateStore.__dict__["_get_state_group_from_group"]
|
||||||
)
|
)
|
||||||
|
|
|
@ -45,7 +45,14 @@ class DownloadResource(Resource):
|
||||||
@request_handler()
|
@request_handler()
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _async_render_GET(self, request):
|
def _async_render_GET(self, request):
|
||||||
request.setHeader("Content-Security-Policy", "sandbox")
|
request.setHeader(
|
||||||
|
"Content-Security-Policy",
|
||||||
|
"default-src 'none';"
|
||||||
|
" script-src 'none';"
|
||||||
|
" plugin-types application/pdf;"
|
||||||
|
" style-src 'unsafe-inline';"
|
||||||
|
" object-src 'self';"
|
||||||
|
)
|
||||||
server_name, media_id, name = parse_media_id(request)
|
server_name, media_id, name = parse_media_id(request)
|
||||||
if server_name == self.server_name:
|
if server_name == self.server_name:
|
||||||
yield self._respond_local_file(request, media_id, name)
|
yield self._respond_local_file(request, media_id, name)
|
||||||
|
|
|
@ -55,12 +55,15 @@ def _gen_state_id():
|
||||||
|
|
||||||
|
|
||||||
class _StateCacheEntry(object):
|
class _StateCacheEntry(object):
|
||||||
__slots__ = ["state", "state_group", "state_id"]
|
__slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"]
|
||||||
|
|
||||||
def __init__(self, state, state_group):
|
def __init__(self, state, state_group, prev_group=None, delta_ids=None):
|
||||||
self.state = state
|
self.state = state
|
||||||
self.state_group = state_group
|
self.state_group = state_group
|
||||||
|
|
||||||
|
self.prev_group = prev_group
|
||||||
|
self.delta_ids = delta_ids
|
||||||
|
|
||||||
# The `state_id` is a unique ID we generate that can be used as ID for
|
# The `state_id` is a unique ID we generate that can be used as ID for
|
||||||
# this collection of state. Usually this would be the same as the
|
# this collection of state. Usually this would be the same as the
|
||||||
# state group, but on worker instances we can't generate a new state
|
# state group, but on worker instances we can't generate a new state
|
||||||
|
@ -245,11 +248,20 @@ class StateHandler(object):
|
||||||
if key in context.prev_state_ids:
|
if key in context.prev_state_ids:
|
||||||
replaces = context.prev_state_ids[key]
|
replaces = context.prev_state_ids[key]
|
||||||
event.unsigned["replaces_state"] = replaces
|
event.unsigned["replaces_state"] = replaces
|
||||||
|
|
||||||
context.current_state_ids = dict(context.prev_state_ids)
|
context.current_state_ids = dict(context.prev_state_ids)
|
||||||
context.current_state_ids[key] = event.event_id
|
context.current_state_ids[key] = event.event_id
|
||||||
|
|
||||||
|
context.prev_group = entry.prev_group
|
||||||
|
context.delta_ids = entry.delta_ids
|
||||||
|
if context.delta_ids is not None:
|
||||||
|
context.delta_ids[key] = event.event_id
|
||||||
else:
|
else:
|
||||||
context.current_state_ids = context.prev_state_ids
|
context.current_state_ids = context.prev_state_ids
|
||||||
|
|
||||||
|
context.prev_group = entry.prev_group
|
||||||
|
context.delta_ids = entry.delta_ids
|
||||||
|
|
||||||
context.prev_state_events = []
|
context.prev_state_events = []
|
||||||
defer.returnValue(context)
|
defer.returnValue(context)
|
||||||
|
|
||||||
|
@ -283,6 +295,8 @@ class StateHandler(object):
|
||||||
defer.returnValue(_StateCacheEntry(
|
defer.returnValue(_StateCacheEntry(
|
||||||
state=state_list,
|
state=state_list,
|
||||||
state_group=name,
|
state_group=name,
|
||||||
|
prev_group=name,
|
||||||
|
delta_ids={},
|
||||||
))
|
))
|
||||||
|
|
||||||
with (yield self.resolve_linearizer.queue(group_names)):
|
with (yield self.resolve_linearizer.queue(group_names)):
|
||||||
|
@ -340,9 +354,24 @@ class StateHandler(object):
|
||||||
if hasattr(self.store, "get_next_state_group"):
|
if hasattr(self.store, "get_next_state_group"):
|
||||||
state_group = self.store.get_next_state_group()
|
state_group = self.store.get_next_state_group()
|
||||||
|
|
||||||
|
prev_group = None
|
||||||
|
delta_ids = None
|
||||||
|
for old_group, old_ids in state_groups_ids.items():
|
||||||
|
if not set(new_state.iterkeys()) - set(old_ids.iterkeys()):
|
||||||
|
n_delta_ids = {
|
||||||
|
k: v
|
||||||
|
for k, v in new_state.items()
|
||||||
|
if old_ids.get(k) != v
|
||||||
|
}
|
||||||
|
if not delta_ids or len(n_delta_ids) < len(delta_ids):
|
||||||
|
prev_group = old_group
|
||||||
|
delta_ids = n_delta_ids
|
||||||
|
|
||||||
cache = _StateCacheEntry(
|
cache = _StateCacheEntry(
|
||||||
state=new_state,
|
state=new_state,
|
||||||
state_group=state_group,
|
state_group=state_group,
|
||||||
|
prev_group=prev_group,
|
||||||
|
delta_ids=delta_ids,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._state_cache is not None:
|
if self._state_cache is not None:
|
||||||
|
|
|
@ -54,8 +54,12 @@ class DeviceStore(SQLBaseStore):
|
||||||
or_ignore=ignore_if_known,
|
or_ignore=ignore_if_known,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("store_device with device_id=%s failed: %s",
|
logger.error("store_device with device_id=%s(%r) user_id=%s(%r)"
|
||||||
device_id, e)
|
" display_name=%s(%r) failed: %s",
|
||||||
|
type(device_id).__name__, device_id,
|
||||||
|
type(user_id).__name__, user_id,
|
||||||
|
type(initial_device_display_name).__name__,
|
||||||
|
initial_device_display_name, e)
|
||||||
raise StoreError(500, "Problem storing device.")
|
raise StoreError(500, "Problem storing device.")
|
||||||
|
|
||||||
def get_device(self, user_id, device_id):
|
def get_device(self, user_id, device_id):
|
||||||
|
|
|
@ -497,7 +497,11 @@ class EventsStore(SQLBaseStore):
|
||||||
|
|
||||||
# insert into the state_group, state_groups_state and
|
# insert into the state_group, state_groups_state and
|
||||||
# event_to_state_groups tables.
|
# event_to_state_groups tables.
|
||||||
self._store_mult_state_groups_txn(txn, ((event, context),))
|
try:
|
||||||
|
self._store_mult_state_groups_txn(txn, ((event, context),))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("")
|
||||||
|
raise
|
||||||
|
|
||||||
metadata_json = encode_json(
|
metadata_json = encode_json(
|
||||||
event.internal_metadata.get_dict()
|
event.internal_metadata.get_dict()
|
||||||
|
@ -1543,6 +1547,9 @@ class EventsStore(SQLBaseStore):
|
||||||
)
|
)
|
||||||
event_rows = txn.fetchall()
|
event_rows = txn.fetchall()
|
||||||
|
|
||||||
|
for event_id, state_key in event_rows:
|
||||||
|
txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
|
||||||
|
|
||||||
# We calculate the new entries for the backward extremeties by finding
|
# We calculate the new entries for the backward extremeties by finding
|
||||||
# all events that point to events that are to be purged
|
# all events that point to events that are to be purged
|
||||||
txn.execute(
|
txn.execute(
|
||||||
|
@ -1582,7 +1589,66 @@ class EventsStore(SQLBaseStore):
|
||||||
" GROUP BY state_group HAVING MAX(topological_ordering) < ?",
|
" GROUP BY state_group HAVING MAX(topological_ordering) < ?",
|
||||||
(room_id, topological_ordering, topological_ordering)
|
(room_id, topological_ordering, topological_ordering)
|
||||||
)
|
)
|
||||||
|
|
||||||
state_rows = txn.fetchall()
|
state_rows = txn.fetchall()
|
||||||
|
state_groups_to_delete = [sg for sg, in state_rows]
|
||||||
|
|
||||||
|
# Now we get all the state groups that rely on these state groups
|
||||||
|
new_state_edges = []
|
||||||
|
chunks = [
|
||||||
|
state_groups_to_delete[i:i + 100]
|
||||||
|
for i in xrange(0, len(state_groups_to_delete), 100)
|
||||||
|
]
|
||||||
|
for chunk in chunks:
|
||||||
|
rows = self._simple_select_many_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
column="prev_state_group",
|
||||||
|
iterable=chunk,
|
||||||
|
retcols=["state_group"],
|
||||||
|
keyvalues={},
|
||||||
|
)
|
||||||
|
new_state_edges.extend(row["state_group"] for row in rows)
|
||||||
|
|
||||||
|
# Now we turn the state groups that reference to-be-deleted state groups
|
||||||
|
# to non delta versions.
|
||||||
|
for new_state_edge in new_state_edges:
|
||||||
|
curr_state = self._get_state_groups_from_groups_txn(
|
||||||
|
txn, [new_state_edge], types=None
|
||||||
|
)
|
||||||
|
curr_state = curr_state[new_state_edge]
|
||||||
|
|
||||||
|
self._simple_delete_txn(
|
||||||
|
txn,
|
||||||
|
table="state_groups_state",
|
||||||
|
keyvalues={
|
||||||
|
"state_group": new_state_edge,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self._simple_delete_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
keyvalues={
|
||||||
|
"state_group": new_state_edge,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self._simple_insert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="state_groups_state",
|
||||||
|
values=[
|
||||||
|
{
|
||||||
|
"state_group": new_state_edge,
|
||||||
|
"room_id": room_id,
|
||||||
|
"type": key[0],
|
||||||
|
"state_key": key[1],
|
||||||
|
"event_id": state_id,
|
||||||
|
}
|
||||||
|
for key, state_id in curr_state.items()
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
txn.executemany(
|
txn.executemany(
|
||||||
"DELETE FROM state_groups_state WHERE state_group = ?",
|
"DELETE FROM state_groups_state WHERE state_group = ?",
|
||||||
state_rows
|
state_rows
|
||||||
|
|
|
@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Remember to update this number every time a change is made to database
|
# Remember to update this number every time a change is made to database
|
||||||
# schema files, so the users will be informed on server restarts.
|
# schema files, so the users will be informed on server restarts.
|
||||||
SCHEMA_VERSION = 34
|
SCHEMA_VERSION = 35
|
||||||
|
|
||||||
dir_path = os.path.abspath(os.path.dirname(__file__))
|
dir_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
@ -242,7 +242,7 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files,
|
||||||
module = imp.load_source(
|
module = imp.load_source(
|
||||||
module_name, absolute_path, python_file
|
module_name, absolute_path, python_file
|
||||||
)
|
)
|
||||||
logger.debug("Running script %s", relative_path)
|
logger.info("Running script %s", relative_path)
|
||||||
module.run_create(cur, database_engine)
|
module.run_create(cur, database_engine)
|
||||||
if not is_empty:
|
if not is_empty:
|
||||||
module.run_upgrade(cur, database_engine, config=config)
|
module.run_upgrade(cur, database_engine, config=config)
|
||||||
|
@ -253,7 +253,7 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files,
|
||||||
pass
|
pass
|
||||||
elif ext == ".sql":
|
elif ext == ".sql":
|
||||||
# A plain old .sql file, just read and execute it
|
# A plain old .sql file, just read and execute it
|
||||||
logger.debug("Applying schema %s", relative_path)
|
logger.info("Applying schema %s", relative_path)
|
||||||
executescript(cur, absolute_path)
|
executescript(cur, absolute_path)
|
||||||
else:
|
else:
|
||||||
# Not a valid delta file.
|
# Not a valid delta file.
|
||||||
|
|
22
synapse/storage/schema/delta/35/state.sql
Normal file
22
synapse/storage/schema/delta/35/state.sql
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
/* Copyright 2016 OpenMarket Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
CREATE TABLE state_group_edges(
|
||||||
|
state_group BIGINT NOT NULL,
|
||||||
|
prev_state_group BIGINT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX state_group_edges_idx ON state_group_edges(state_group);
|
||||||
|
CREATE INDEX state_group_edges_prev_idx ON state_group_edges(prev_state_group);
|
17
synapse/storage/schema/delta/35/state_dedupe.sql
Normal file
17
synapse/storage/schema/delta/35/state_dedupe.sql
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
/* Copyright 2016 OpenMarket Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
INSERT into background_updates (update_name, progress_json)
|
||||||
|
VALUES ('state_group_state_deduplication', '{}');
|
|
@ -16,6 +16,7 @@
|
||||||
from ._base import SQLBaseStore
|
from ._base import SQLBaseStore
|
||||||
from synapse.util.caches.descriptors import cached, cachedList
|
from synapse.util.caches.descriptors import cached, cachedList
|
||||||
from synapse.util.caches import intern_string
|
from synapse.util.caches import intern_string
|
||||||
|
from synapse.storage.engines import PostgresEngine
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
|
@ -24,6 +25,9 @@ import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
MAX_STATE_DELTA_HOPS = 100
|
||||||
|
|
||||||
|
|
||||||
class StateStore(SQLBaseStore):
|
class StateStore(SQLBaseStore):
|
||||||
""" Keeps track of the state at a given event.
|
""" Keeps track of the state at a given event.
|
||||||
|
|
||||||
|
@ -43,6 +47,15 @@ class StateStore(SQLBaseStore):
|
||||||
* `state_groups_state`: Maps state group to state events.
|
* `state_groups_state`: Maps state group to state events.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
|
||||||
|
|
||||||
|
def __init__(self, hs):
|
||||||
|
super(StateStore, self).__init__(hs)
|
||||||
|
self.register_background_update_handler(
|
||||||
|
self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME,
|
||||||
|
self._background_deduplicate_state,
|
||||||
|
)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_state_groups_ids(self, room_id, event_ids):
|
def get_state_groups_ids(self, room_id, event_ids):
|
||||||
if not event_ids:
|
if not event_ids:
|
||||||
|
@ -103,11 +116,8 @@ class StateStore(SQLBaseStore):
|
||||||
state_groups[event.event_id] = context.state_group
|
state_groups[event.event_id] = context.state_group
|
||||||
|
|
||||||
if self._have_persisted_state_group_txn(txn, context.state_group):
|
if self._have_persisted_state_group_txn(txn, context.state_group):
|
||||||
logger.info("Already persisted state_group: %r", context.state_group)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
state_event_ids = dict(context.current_state_ids)
|
|
||||||
|
|
||||||
self._simple_insert_txn(
|
self._simple_insert_txn(
|
||||||
txn,
|
txn,
|
||||||
table="state_groups",
|
table="state_groups",
|
||||||
|
@ -118,20 +128,51 @@ class StateStore(SQLBaseStore):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
self._simple_insert_many_txn(
|
# We persist as a delta if we can, while also ensuring the chain
|
||||||
txn,
|
# of deltas isn't tooo long, as otherwise read performance degrades.
|
||||||
table="state_groups_state",
|
if context.prev_group:
|
||||||
values=[
|
potential_hops = self._count_state_group_hops_txn(
|
||||||
{
|
txn, context.prev_group
|
||||||
|
)
|
||||||
|
if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS:
|
||||||
|
self._simple_insert_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
values={
|
||||||
"state_group": context.state_group,
|
"state_group": context.state_group,
|
||||||
"room_id": event.room_id,
|
"prev_state_group": context.prev_group,
|
||||||
"type": key[0],
|
},
|
||||||
"state_key": key[1],
|
)
|
||||||
"event_id": state_id,
|
|
||||||
}
|
self._simple_insert_many_txn(
|
||||||
for key, state_id in state_event_ids.items()
|
txn,
|
||||||
],
|
table="state_groups_state",
|
||||||
)
|
values=[
|
||||||
|
{
|
||||||
|
"state_group": context.state_group,
|
||||||
|
"room_id": event.room_id,
|
||||||
|
"type": key[0],
|
||||||
|
"state_key": key[1],
|
||||||
|
"event_id": state_id,
|
||||||
|
}
|
||||||
|
for key, state_id in context.delta_ids.items()
|
||||||
|
],
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._simple_insert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="state_groups_state",
|
||||||
|
values=[
|
||||||
|
{
|
||||||
|
"state_group": context.state_group,
|
||||||
|
"room_id": event.room_id,
|
||||||
|
"type": key[0],
|
||||||
|
"state_key": key[1],
|
||||||
|
"event_id": state_id,
|
||||||
|
}
|
||||||
|
for key, state_id in context.current_state_ids.items()
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
self._simple_insert_many_txn(
|
self._simple_insert_many_txn(
|
||||||
txn,
|
txn,
|
||||||
|
@ -145,6 +186,47 @@ class StateStore(SQLBaseStore):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _count_state_group_hops_txn(self, txn, state_group):
|
||||||
|
"""Given a state group, count how many hops there are in the tree.
|
||||||
|
|
||||||
|
This is used to ensure the delta chains don't get too long.
|
||||||
|
"""
|
||||||
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
|
sql = ("""
|
||||||
|
WITH RECURSIVE state(state_group) AS (
|
||||||
|
VALUES(?::bigint)
|
||||||
|
UNION ALL
|
||||||
|
SELECT prev_state_group FROM state_group_edges e, state s
|
||||||
|
WHERE s.state_group = e.state_group
|
||||||
|
)
|
||||||
|
SELECT count(*) FROM state;
|
||||||
|
""")
|
||||||
|
|
||||||
|
txn.execute(sql, (state_group,))
|
||||||
|
row = txn.fetchone()
|
||||||
|
if row and row[0]:
|
||||||
|
return row[0]
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
# We don't use WITH RECURSIVE on sqlite3 as there are distributions
|
||||||
|
# that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
|
||||||
|
next_group = state_group
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
while next_group:
|
||||||
|
next_group = self._simple_select_one_onecol_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
keyvalues={"state_group": next_group},
|
||||||
|
retcol="prev_state_group",
|
||||||
|
allow_none=True,
|
||||||
|
)
|
||||||
|
if next_group:
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_current_state(self, room_id, event_type=None, state_key=""):
|
def get_current_state(self, room_id, event_type=None, state_key=""):
|
||||||
if event_type and state_key is not None:
|
if event_type and state_key is not None:
|
||||||
|
@ -206,48 +288,108 @@ class StateStore(SQLBaseStore):
|
||||||
def _get_state_groups_from_groups(self, groups, types):
|
def _get_state_groups_from_groups(self, groups, types):
|
||||||
"""Returns dictionary state_group -> (dict of (type, state_key) -> event id)
|
"""Returns dictionary state_group -> (dict of (type, state_key) -> event id)
|
||||||
"""
|
"""
|
||||||
def f(txn, groups):
|
|
||||||
if types is not None:
|
|
||||||
where_clause = "AND (%s)" % (
|
|
||||||
" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
where_clause = ""
|
|
||||||
|
|
||||||
sql = (
|
|
||||||
"SELECT state_group, event_id, type, state_key"
|
|
||||||
" FROM state_groups_state WHERE"
|
|
||||||
" state_group IN (%s) %s" % (
|
|
||||||
",".join("?" for _ in groups),
|
|
||||||
where_clause,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
args = list(groups)
|
|
||||||
if types is not None:
|
|
||||||
args.extend([i for typ in types for i in typ])
|
|
||||||
|
|
||||||
txn.execute(sql, args)
|
|
||||||
rows = self.cursor_to_dict(txn)
|
|
||||||
|
|
||||||
results = {group: {} for group in groups}
|
|
||||||
for row in rows:
|
|
||||||
key = (row["type"], row["state_key"])
|
|
||||||
results[row["state_group"]][key] = row["event_id"]
|
|
||||||
return results
|
|
||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)]
|
chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)]
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
res = yield self.runInteraction(
|
res = yield self.runInteraction(
|
||||||
"_get_state_groups_from_groups",
|
"_get_state_groups_from_groups",
|
||||||
f, chunk
|
self._get_state_groups_from_groups_txn, chunk, types,
|
||||||
)
|
)
|
||||||
results.update(res)
|
results.update(res)
|
||||||
|
|
||||||
defer.returnValue(results)
|
defer.returnValue(results)
|
||||||
|
|
||||||
|
def _get_state_groups_from_groups_txn(self, txn, groups, types=None):
|
||||||
|
if types is not None:
|
||||||
|
where_clause = "AND (%s)" % (
|
||||||
|
" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
where_clause = ""
|
||||||
|
|
||||||
|
results = {group: {} for group in groups}
|
||||||
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
|
# Temporarily disable sequential scans in this transaction. This is
|
||||||
|
# a temporary hack until we can add the right indices in
|
||||||
|
txn.execute("SET LOCAL enable_seqscan=off")
|
||||||
|
|
||||||
|
# The below query walks the state_group tree so that the "state"
|
||||||
|
# table includes all state_groups in the tree. It then joins
|
||||||
|
# against `state_groups_state` to fetch the latest state.
|
||||||
|
# It assumes that previous state groups are always numerically
|
||||||
|
# lesser.
|
||||||
|
# The PARTITION is used to get the event_id in the greatest state
|
||||||
|
# group for the given type, state_key.
|
||||||
|
# This may return multiple rows per (type, state_key), but last_value
|
||||||
|
# should be the same.
|
||||||
|
sql = ("""
|
||||||
|
WITH RECURSIVE state(state_group) AS (
|
||||||
|
VALUES(?::bigint)
|
||||||
|
UNION ALL
|
||||||
|
SELECT prev_state_group FROM state_group_edges e, state s
|
||||||
|
WHERE s.state_group = e.state_group
|
||||||
|
)
|
||||||
|
SELECT type, state_key, last_value(event_id) OVER (
|
||||||
|
PARTITION BY type, state_key ORDER BY state_group ASC
|
||||||
|
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||||
|
) AS event_id FROM state_groups_state
|
||||||
|
WHERE state_group IN (
|
||||||
|
SELECT state_group FROM state
|
||||||
|
)
|
||||||
|
%s;
|
||||||
|
""") % (where_clause,)
|
||||||
|
|
||||||
|
for group in groups:
|
||||||
|
args = [group]
|
||||||
|
if types is not None:
|
||||||
|
args.extend([i for typ in types for i in typ])
|
||||||
|
|
||||||
|
txn.execute(sql, args)
|
||||||
|
rows = self.cursor_to_dict(txn)
|
||||||
|
for row in rows:
|
||||||
|
key = (row["type"], row["state_key"])
|
||||||
|
results[group][key] = row["event_id"]
|
||||||
|
else:
|
||||||
|
# We don't use WITH RECURSIVE on sqlite3 as there are distributions
|
||||||
|
# that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
|
||||||
|
for group in groups:
|
||||||
|
group_tree = [group]
|
||||||
|
next_group = group
|
||||||
|
|
||||||
|
while next_group:
|
||||||
|
next_group = self._simple_select_one_onecol_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
keyvalues={"state_group": next_group},
|
||||||
|
retcol="prev_state_group",
|
||||||
|
allow_none=True,
|
||||||
|
)
|
||||||
|
if next_group:
|
||||||
|
group_tree.append(next_group)
|
||||||
|
|
||||||
|
sql = ("""
|
||||||
|
SELECT type, state_key, event_id FROM state_groups_state
|
||||||
|
INNER JOIN (
|
||||||
|
SELECT type, state_key, max(state_group) as state_group
|
||||||
|
FROM state_groups_state
|
||||||
|
WHERE state_group IN (%s) %s
|
||||||
|
GROUP BY type, state_key
|
||||||
|
) USING (type, state_key, state_group);
|
||||||
|
""") % (",".join("?" for _ in group_tree), where_clause,)
|
||||||
|
|
||||||
|
args = list(group_tree)
|
||||||
|
if types is not None:
|
||||||
|
args.extend([i for typ in types for i in typ])
|
||||||
|
|
||||||
|
txn.execute(sql, args)
|
||||||
|
rows = self.cursor_to_dict(txn)
|
||||||
|
for row in rows:
|
||||||
|
key = (row["type"], row["state_key"])
|
||||||
|
results[group][key] = row["event_id"]
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_state_for_events(self, event_ids, types):
|
def get_state_for_events(self, event_ids, types):
|
||||||
"""Given a list of event_ids and type tuples, return a list of state
|
"""Given a list of event_ids and type tuples, return a list of state
|
||||||
|
@ -504,32 +646,150 @@ class StateStore(SQLBaseStore):
|
||||||
|
|
||||||
defer.returnValue(results)
|
defer.returnValue(results)
|
||||||
|
|
||||||
def get_all_new_state_groups(self, last_id, current_id, limit):
|
|
||||||
def get_all_new_state_groups_txn(txn):
|
|
||||||
sql = (
|
|
||||||
"SELECT id, room_id, event_id FROM state_groups"
|
|
||||||
" WHERE ? < id AND id <= ? ORDER BY id LIMIT ?"
|
|
||||||
)
|
|
||||||
txn.execute(sql, (last_id, current_id, limit))
|
|
||||||
groups = txn.fetchall()
|
|
||||||
|
|
||||||
if not groups:
|
|
||||||
return ([], [])
|
|
||||||
|
|
||||||
lower_bound = groups[0][0]
|
|
||||||
upper_bound = groups[-1][0]
|
|
||||||
sql = (
|
|
||||||
"SELECT state_group, type, state_key, event_id"
|
|
||||||
" FROM state_groups_state"
|
|
||||||
" WHERE ? <= state_group AND state_group <= ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
txn.execute(sql, (lower_bound, upper_bound))
|
|
||||||
state_group_state = txn.fetchall()
|
|
||||||
return (groups, state_group_state)
|
|
||||||
return self.runInteraction(
|
|
||||||
"get_all_new_state_groups", get_all_new_state_groups_txn
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_next_state_group(self):
|
def get_next_state_group(self):
|
||||||
return self._state_groups_id_gen.get_next()
|
return self._state_groups_id_gen.get_next()
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _background_deduplicate_state(self, progress, batch_size):
|
||||||
|
"""This background update will slowly deduplicate state by reencoding
|
||||||
|
them as deltas.
|
||||||
|
"""
|
||||||
|
last_state_group = progress.get("last_state_group", 0)
|
||||||
|
rows_inserted = progress.get("rows_inserted", 0)
|
||||||
|
max_group = progress.get("max_group", None)
|
||||||
|
|
||||||
|
BATCH_SIZE_SCALE_FACTOR = 100
|
||||||
|
|
||||||
|
batch_size = max(1, int(batch_size / BATCH_SIZE_SCALE_FACTOR))
|
||||||
|
|
||||||
|
if max_group is None:
|
||||||
|
rows = yield self._execute(
|
||||||
|
"_background_deduplicate_state", None,
|
||||||
|
"SELECT coalesce(max(id), 0) FROM state_groups",
|
||||||
|
)
|
||||||
|
max_group = rows[0][0]
|
||||||
|
|
||||||
|
def reindex_txn(txn):
|
||||||
|
new_last_state_group = last_state_group
|
||||||
|
for count in xrange(batch_size):
|
||||||
|
txn.execute(
|
||||||
|
"SELECT id, room_id FROM state_groups"
|
||||||
|
" WHERE ? < id AND id <= ?"
|
||||||
|
" ORDER BY id ASC"
|
||||||
|
" LIMIT 1",
|
||||||
|
(new_last_state_group, max_group,)
|
||||||
|
)
|
||||||
|
row = txn.fetchone()
|
||||||
|
if row:
|
||||||
|
state_group, room_id = row
|
||||||
|
|
||||||
|
if not row or not state_group:
|
||||||
|
return True, count
|
||||||
|
|
||||||
|
txn.execute(
|
||||||
|
"SELECT state_group FROM state_group_edges"
|
||||||
|
" WHERE state_group = ?",
|
||||||
|
(state_group,)
|
||||||
|
)
|
||||||
|
|
||||||
|
# If we reach a point where we've already started inserting
|
||||||
|
# edges we should stop.
|
||||||
|
if txn.fetchall():
|
||||||
|
return True, count
|
||||||
|
|
||||||
|
txn.execute(
|
||||||
|
"SELECT coalesce(max(id), 0) FROM state_groups"
|
||||||
|
" WHERE id < ? AND room_id = ?",
|
||||||
|
(state_group, room_id,)
|
||||||
|
)
|
||||||
|
prev_group, = txn.fetchone()
|
||||||
|
new_last_state_group = state_group
|
||||||
|
|
||||||
|
if prev_group:
|
||||||
|
potential_hops = self._count_state_group_hops_txn(
|
||||||
|
txn, prev_group
|
||||||
|
)
|
||||||
|
if potential_hops >= MAX_STATE_DELTA_HOPS:
|
||||||
|
# We want to ensure chains are at most this long,#
|
||||||
|
# otherwise read performance degrades.
|
||||||
|
continue
|
||||||
|
|
||||||
|
prev_state = self._get_state_groups_from_groups_txn(
|
||||||
|
txn, [prev_group], types=None
|
||||||
|
)
|
||||||
|
prev_state = prev_state[prev_group]
|
||||||
|
|
||||||
|
curr_state = self._get_state_groups_from_groups_txn(
|
||||||
|
txn, [state_group], types=None
|
||||||
|
)
|
||||||
|
curr_state = curr_state[state_group]
|
||||||
|
|
||||||
|
if not set(prev_state.keys()) - set(curr_state.keys()):
|
||||||
|
# We can only do a delta if the current has a strict super set
|
||||||
|
# of keys
|
||||||
|
|
||||||
|
delta_state = {
|
||||||
|
key: value for key, value in curr_state.items()
|
||||||
|
if prev_state.get(key, None) != value
|
||||||
|
}
|
||||||
|
|
||||||
|
self._simple_delete_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
keyvalues={
|
||||||
|
"state_group": state_group,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self._simple_insert_txn(
|
||||||
|
txn,
|
||||||
|
table="state_group_edges",
|
||||||
|
values={
|
||||||
|
"state_group": state_group,
|
||||||
|
"prev_state_group": prev_group,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self._simple_delete_txn(
|
||||||
|
txn,
|
||||||
|
table="state_groups_state",
|
||||||
|
keyvalues={
|
||||||
|
"state_group": state_group,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self._simple_insert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="state_groups_state",
|
||||||
|
values=[
|
||||||
|
{
|
||||||
|
"state_group": state_group,
|
||||||
|
"room_id": room_id,
|
||||||
|
"type": key[0],
|
||||||
|
"state_key": key[1],
|
||||||
|
"event_id": state_id,
|
||||||
|
}
|
||||||
|
for key, state_id in delta_state.items()
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
progress = {
|
||||||
|
"last_state_group": state_group,
|
||||||
|
"rows_inserted": rows_inserted + batch_size,
|
||||||
|
"max_group": max_group,
|
||||||
|
}
|
||||||
|
|
||||||
|
self._background_update_progress_txn(
|
||||||
|
txn, self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, progress
|
||||||
|
)
|
||||||
|
|
||||||
|
return False, batch_size
|
||||||
|
|
||||||
|
finished, result = yield self.runInteraction(
|
||||||
|
self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, reindex_txn
|
||||||
|
)
|
||||||
|
|
||||||
|
if finished:
|
||||||
|
yield self._end_background_update(self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME)
|
||||||
|
|
||||||
|
defer.returnValue(result * BATCH_SIZE_SCALE_FACTOR)
|
||||||
|
|
Loading…
Reference in a new issue