Add user_directory to database

This commit is contained in:
Erik Johnston 2017-05-31 11:51:01 +01:00
parent 5dbaa520a5
commit eeb2f9e546
6 changed files with 444 additions and 1 deletions

View file

@ -0,0 +1,218 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from twisted.internet import defer
from synapse.api.constants import EventTypes, JoinRules, Membership
from synapse.storage.roommember import ProfileInfo
from synapse.util.metrics import Measure
logger = logging.getLogger(__name__)
class UserDirectoyHandler(object):
def __init__(self, hs):
self.store = hs.get_datastore()
self.state = hs.get_state_handler()
self.server_name = hs.hostname
self.clock = hs.get_clock()
self.initially_handled_users = set()
self.pos = None
self._is_processing = False
@defer.inlineCallbacks
def notify_new_event(self):
if self._is_processing:
return
self._is_processing = True
try:
yield self._unsafe_process()
finally:
self._is_processing = False
@defer.inlineCallbacks
def _unsafe_process(self):
if self.pos is None:
self.pos = yield self.store.get_user_directory_stream_pos()
if self.pos is None:
yield self._do_initial_spam()
self.pos = yield self.store.get_user_directory_stream_pos()
while True:
with Measure(self.clock, "user_dir_delta"):
deltas = yield self.store.get_current_state_deltas(self.pos)
if not deltas:
return
yield self._handle_deltas(deltas)
max_stream_id = deltas[-1]["stream_id"]
yield self.store.update_user_directory_stream_pos(max_stream_id)
@defer.inlineCallbacks
def _handle_room(self, room_id):
# TODO: Check we're still joined to room
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id)
if not is_public:
return
users_with_profile = yield self.state.get_current_user_in_room(room_id)
unhandled_users = set(users_with_profile) - self.initially_handled_users
yield self.store.add_profiles_to_user_dir(
room_id, {
user_id: users_with_profile[user_id] for user_id in unhandled_users
}
)
self.initially_handled_users |= unhandled_users
@defer.inlineCallbacks
def _do_initial_spam(self):
yield self.store.delete_all_from_user_dir()
room_ids = yield self.store.get_all_rooms()
for room_id in room_ids:
yield self._handle_room(room_id)
self.initially_handled_users = None
yield self.store.update_user_directory_stream_pos(-1)
@defer.inlineCallbacks
def _handle_new_user(self, room_id, user_id, profile):
row = yield self.store.get_user_in_directory(user_id)
if row:
return
yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile})
def _handle_remove_user(self, room_id, user_id):
row = yield self.store.get_user_in_directory(user_id)
if not row or row["room_id"] != room_id:
return
# TODO: Make this faster?
rooms = yield self.store.get_rooms_for_user(user_id)
for room_id in rooms:
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
room_id
)
if is_public:
return
yield self.store.remove_from_user_dir(user_id)
@defer.inlineCallbacks
def _handle_deltas(self, deltas):
for delta in deltas:
typ = delta["type"]
state_key = delta["state_key"]
room_id = delta["room_id"]
event_id = delta["event_id"]
prev_event_id = delta["prev_event_id"]
if typ == EventTypes.RoomHistoryVisibility:
change = yield self._get_key_change(
prev_event_id, event_id,
key_name="history_visibility",
public_value="world_readable",
)
if change is None:
continue
users_with_profile = yield self.state.get_current_user_in_room(room_id)
for user_id, profile in users_with_profile.iteritems():
if change:
yield self._handle_new_user(room_id, user_id, profile)
else:
yield self._handle_remove_user(room_id, user_id)
elif typ == EventTypes.JoinRules:
change = yield self._get_key_change(
prev_event_id, event_id,
key_name="join_rules",
public_value=JoinRules.PUBLIC,
)
if change is None:
continue
users_with_profile = yield self.state.get_current_user_in_room(room_id)
for user_id, profile in users_with_profile.iteritems():
if change:
yield self._handle_new_user(room_id, user_id, profile)
else:
yield self._handle_remove_user(room_id, user_id)
elif typ == EventTypes.Member:
change = yield self._get_key_change(
prev_event_id, event_id,
key_name="membership",
public_value=Membership.JOIN,
)
if change is None:
continue
if change:
event = yield self.store.get_event(event_id)
profile = ProfileInfo(
avatar_url=event.content.get("avatar_url"),
display_name=event.content.get("displayname"),
)
yield self._handle_new_user(room_id, state_key, profile)
else:
yield self._handle_remove_user(room_id, state_key)
@defer.inlineCallbacks
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
prev_event = None
event = None
if prev_event_id:
prev_event = yield self.store.get_event(prev_event_id, allow_none=True)
if event_id:
event = yield self.store.get_event(event_id, allow_none=True)
if not event and not prev_event:
defer.returnValue(None)
prev_hist_vis = None
hist_vis = None
if prev_event:
prev_hist_vis = prev_event.content.get(key_name, None)
if event:
hist_vis = event.content.get(key_name, None)
logger.info("prev: %r, new: %r", prev_hist_vis, hist_vis)
if hist_vis == public_value and prev_hist_vis != public_value:
defer.returnValue(True)
elif hist_vis != public_value and prev_hist_vis == public_value:
defer.returnValue(False)
else:
defer.returnValue(None)

View file

@ -167,6 +167,7 @@ class Notifier(object):
self.clock = hs.get_clock() self.clock = hs.get_clock()
self.appservice_handler = hs.get_application_service_handler() self.appservice_handler = hs.get_application_service_handler()
self.user_directory_handler = hs.get_user_directory_handler()
if hs.should_send_federation(): if hs.should_send_federation():
self.federation_sender = hs.get_federation_sender() self.federation_sender = hs.get_federation_sender()
@ -251,7 +252,10 @@ class Notifier(object):
"""Notify any user streams that are interested in this room event""" """Notify any user streams that are interested in this room event"""
# poke any interested application service. # poke any interested application service.
preserve_fn(self.appservice_handler.notify_interested_services)( preserve_fn(self.appservice_handler.notify_interested_services)(
room_stream_id) room_stream_id
)
preserve_fn(self.user_directory_handler.notify_new_event)()
if self.federation_sender: if self.federation_sender:
preserve_fn(self.federation_sender.notify_new_events)( preserve_fn(self.federation_sender.notify_new_events)(

View file

@ -49,6 +49,7 @@ from synapse.handlers.events import EventHandler, EventStreamHandler
from synapse.handlers.initial_sync import InitialSyncHandler from synapse.handlers.initial_sync import InitialSyncHandler
from synapse.handlers.receipts import ReceiptsHandler from synapse.handlers.receipts import ReceiptsHandler
from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.read_marker import ReadMarkerHandler
from synapse.handlers.user_directory import UserDirectoyHandler
from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory
from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.notifier import Notifier from synapse.notifier import Notifier
@ -137,6 +138,7 @@ class HomeServer(object):
'tcp_replication', 'tcp_replication',
'read_marker_handler', 'read_marker_handler',
'action_generator', 'action_generator',
'user_directory_handler',
] ]
def __init__(self, hostname, **kwargs): def __init__(self, hostname, **kwargs):
@ -304,6 +306,9 @@ class HomeServer(object):
def build_action_generator(self): def build_action_generator(self):
return ActionGenerator(self) return ActionGenerator(self)
def build_user_directory_handler(self):
return UserDirectoyHandler(self)
def remove_pusher(self, app_id, push_key, user_id): def remove_pusher(self, app_id, push_key, user_id):
return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)

View file

@ -49,6 +49,7 @@ from .tags import TagsStore
from .account_data import AccountDataStore from .account_data import AccountDataStore
from .openid import OpenIdStore from .openid import OpenIdStore
from .client_ips import ClientIpStore from .client_ips import ClientIpStore
from .user_directory import UserDirectoryStore
from .util.id_generators import IdGenerator, StreamIdGenerator, ChainedIdGenerator from .util.id_generators import IdGenerator, StreamIdGenerator, ChainedIdGenerator
from .engines import PostgresEngine from .engines import PostgresEngine
@ -86,6 +87,7 @@ class DataStore(RoomMemberStore, RoomStore,
ClientIpStore, ClientIpStore,
DeviceStore, DeviceStore,
DeviceInboxStore, DeviceInboxStore,
UserDirectoryStore,
): ):
def __init__(self, db_conn, hs): def __init__(self, db_conn, hs):

View file

@ -0,0 +1,69 @@
# Copyright 2017 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from synapse.storage.prepare_database import get_statements
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
logger = logging.getLogger(__name__)
BOTH_TABLES = """
CREATE TABLE user_directory_stream_pos (
Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
stream_id BIGINT,
CHECK (Lock='X')
);
INSERT INTO user_directory_stream_pos (stream_id) VALUES (null);
"""
POSTGRES_TABLE = """
CREATE TABLE user_directory (
user_id TEXT NOT NULL,
room_id TEXT NOT NULL,
display_name TEXT,
avatar_url TEXT,
vector tsvector
);
CREATE INDEX user_directory_fts_idx ON user_directory USING gin(vector);
CREATE INDEX user_directory_user_idx ON user_directory(user_id);
"""
SQLITE_TABLE = """
CREATE VIRTUAL TABLE user_directory
USING fts4 ( user_id, room_id, display_name, avatar_url, value );
"""
def run_create(cur, database_engine, *args, **kwargs):
for statement in get_statements(BOTH_TABLES.splitlines()):
cur.execute(statement)
if isinstance(database_engine, PostgresEngine):
for statement in get_statements(POSTGRES_TABLE.splitlines()):
cur.execute(statement)
elif isinstance(database_engine, Sqlite3Engine):
for statement in get_statements(SQLITE_TABLE.splitlines()):
cur.execute(statement)
else:
raise Exception("Unrecognized database engine")
def run_upgrade(*args, **kwargs):
pass

View file

@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Vector Creations Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer
from ._base import SQLBaseStore
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
from synapse.api.constants import EventTypes, JoinRules
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
class UserDirectoryStore(SQLBaseStore):
@cachedInlineCallbacks(cache_context=True)
def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context):
current_state_ids = yield self.get_current_state_ids(
room_id, on_invalidate=cache_context.invalidate
)
join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
if join_rules_id:
join_rule_ev = yield self.get_event(join_rules_id, allow_none=True)
if join_rule_ev:
if join_rule_ev.content.get("join_rules") == JoinRules.PUBLIC:
defer.returnValue(True)
hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
if hist_vis_id:
hist_vis_ev = yield self.get_event(hist_vis_id, allow_none=True)
if hist_vis_ev:
if hist_vis_ev.content.get("history_visibility") == "world_readable":
defer.returnValue(True)
defer.returnValue(False)
def add_profiles_to_user_dir(self, room_id, users_with_profile):
if isinstance(self.database_engine, PostgresEngine):
sql = """
INSERT INTO user_directory
(user_id, room_id, display_name, avatar_url, vector)
VALUES (?,?,?,?,to_tsvector('english', ?))
"""
elif isinstance(self.database_engine, Sqlite3Engine):
sql = """
INSERT INTO user_directory
(user_id, room_id, display_name, avatar_url, value)
VALUES (?,?,?,?,?)
"""
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
def _add_profiles_to_user_dir_txn(txn):
txn.executemany(sql, (
(
user_id, room_id, p.display_name, p.avatar_url,
"%s %s" % (user_id, p.display_name,) if p.display_name else user_id
)
for user_id, p in users_with_profile.iteritems()
))
for user_id in users_with_profile:
txn.call_after(
self.get_user_in_directory.invalidate, (user_id,)
)
return self.runInteraction(
"add_profiles_to_user_dir", _add_profiles_to_user_dir_txn
)
@defer.inlineCallbacks
def remove_from_user_dir(self, user_id):
yield self._simple_delete(
table="user_directory",
keyvalues={"user_id": user_id},
desc="remove_from_user_dir",
)
self.get_user_in_directory.invalidate((user_id,))
def get_all_rooms(self):
return self._simple_select_onecol(
table="current_state_events",
keyvalues={},
retcol="DISTINCT room_id",
desc="get_all_rooms",
)
def delete_all_from_user_dir(self):
def _delete_all_from_user_dir_txn(txn):
txn.execute("DELETE FROM user_directory")
txn.call_after(self.get_user_in_directory.invalidate_all)
return self.runInteraction(
"delete_all_from_user_dir", _delete_all_from_user_dir_txn
)
@cached()
def get_user_in_directory(self, user_id):
return self._simple_select_one(
table="user_directory",
keyvalues={"user_id": user_id},
retcols=("room_id", "display_name", "avatar_url",),
allow_none=True,
desc="get_user_in_directory",
)
def get_user_directory_stream_pos(self):
return self._simple_select_one_onecol(
table="user_directory_stream_pos",
keyvalues={},
retcol="stream_id",
desc="get_user_directory_stream_pos",
)
def update_user_directory_stream_pos(self, stream_id):
return self._simple_update_one(
table="user_directory_stream_pos",
keyvalues={},
updatevalues={"stream_id": stream_id},
desc="update_user_directory_stream_pos",
)
def get_current_state_deltas(self, prev_stream_id):
# TODO: Add stream change cache
# TODO: Add limit
sql = """
SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
FROM current_state_delta_stream
WHERE stream_id > ?
ORDER BY stream_id ASC
"""
return self._execute(
"get_current_state_deltas", self.cursor_to_dict, sql, prev_stream_id
)