forked from MirrorHub/synapse
Merge pull request #2769 from matrix-org/matthew/hit_the_gin
switch back from GIST to GIN indexes
This commit is contained in:
commit
d28ec43e15
5 changed files with 109 additions and 19 deletions
|
@ -242,6 +242,25 @@ class BackgroundUpdateStore(SQLBaseStore):
|
||||||
"""
|
"""
|
||||||
self._background_update_handlers[update_name] = update_handler
|
self._background_update_handlers[update_name] = update_handler
|
||||||
|
|
||||||
|
def register_noop_background_update(self, update_name):
|
||||||
|
"""Register a noop handler for a background update.
|
||||||
|
|
||||||
|
This is useful when we previously did a background update, but no
|
||||||
|
longer wish to do the update. In this case the background update should
|
||||||
|
be removed from the schema delta files, but there may still be some
|
||||||
|
users who have the background update queued, so this method should
|
||||||
|
also be called to clear the update.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
update_name (str): Name of update
|
||||||
|
"""
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def noop_update(progress, batch_size):
|
||||||
|
yield self._end_background_update(update_name)
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
self.register_background_update_handler(update_name, noop_update)
|
||||||
|
|
||||||
def register_background_index_update(self, update_name, index_name,
|
def register_background_index_update(self, update_name, index_name,
|
||||||
table, columns, where_clause=None,
|
table, columns, where_clause=None,
|
||||||
unique=False,
|
unique=False,
|
||||||
|
|
|
@ -39,12 +39,7 @@ class RegistrationStore(background_updates.BackgroundUpdateStore):
|
||||||
# we no longer use refresh tokens, but it's possible that some people
|
# we no longer use refresh tokens, but it's possible that some people
|
||||||
# might have a background update queued to build this index. Just
|
# might have a background update queued to build this index. Just
|
||||||
# clear the background update.
|
# clear the background update.
|
||||||
@defer.inlineCallbacks
|
self.register_noop_background_update("refresh_tokens_device_index")
|
||||||
def noop_update(progress, batch_size):
|
|
||||||
yield self._end_background_update("refresh_tokens_device_index")
|
|
||||||
defer.returnValue(1)
|
|
||||||
self.register_background_update_handler(
|
|
||||||
"refresh_tokens_device_index", noop_update)
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def add_access_token_to_user(self, user_id, token, device_id=None):
|
def add_access_token_to_user(self, user_id, token, device_id=None):
|
||||||
|
|
|
@ -13,5 +13,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
INSERT into background_updates (update_name, progress_json)
|
-- We no longer do this given we back it out again in schema 47
|
||||||
VALUES ('event_search_postgres_gist', '{}');
|
|
||||||
|
-- INSERT into background_updates (update_name, progress_json)
|
||||||
|
-- VALUES ('event_search_postgres_gist', '{}');
|
||||||
|
|
17
synapse/storage/schema/delta/47/postgres_fts_gin.sql
Normal file
17
synapse/storage/schema/delta/47/postgres_fts_gin.sql
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
/* Copyright 2018 New Vector Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
INSERT into background_updates (update_name, progress_json)
|
||||||
|
VALUES ('event_search_postgres_gin', '{}');
|
|
@ -38,6 +38,7 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
EVENT_SEARCH_UPDATE_NAME = "event_search"
|
EVENT_SEARCH_UPDATE_NAME = "event_search"
|
||||||
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
|
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
|
||||||
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
|
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
|
||||||
|
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
|
||||||
|
|
||||||
def __init__(self, db_conn, hs):
|
def __init__(self, db_conn, hs):
|
||||||
super(SearchStore, self).__init__(db_conn, hs)
|
super(SearchStore, self).__init__(db_conn, hs)
|
||||||
|
@ -48,9 +49,19 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
self.EVENT_SEARCH_ORDER_UPDATE_NAME,
|
self.EVENT_SEARCH_ORDER_UPDATE_NAME,
|
||||||
self._background_reindex_search_order
|
self._background_reindex_search_order
|
||||||
)
|
)
|
||||||
self.register_background_update_handler(
|
|
||||||
|
# we used to have a background update to turn the GIN index into a
|
||||||
|
# GIST one; we no longer do that (obviously) because we actually want
|
||||||
|
# a GIN index. However, it's possible that some people might still have
|
||||||
|
# the background update queued, so we register a handler to clear the
|
||||||
|
# background update.
|
||||||
|
self.register_noop_background_update(
|
||||||
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
|
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
|
||||||
self._background_reindex_gist_search
|
)
|
||||||
|
|
||||||
|
self.register_background_update_handler(
|
||||||
|
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME,
|
||||||
|
self._background_reindex_gin_search
|
||||||
)
|
)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
|
@ -151,25 +162,48 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
defer.returnValue(result)
|
defer.returnValue(result)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _background_reindex_gist_search(self, progress, batch_size):
|
def _background_reindex_gin_search(self, progress, batch_size):
|
||||||
|
"""This handles old synapses which used GIST indexes, if any;
|
||||||
|
converting them back to be GIN as per the actual schema.
|
||||||
|
"""
|
||||||
|
|
||||||
def create_index(conn):
|
def create_index(conn):
|
||||||
conn.rollback()
|
conn.rollback()
|
||||||
|
|
||||||
|
# we have to set autocommit, because postgres refuses to
|
||||||
|
# CREATE INDEX CONCURRENTLY without it.
|
||||||
conn.set_session(autocommit=True)
|
conn.set_session(autocommit=True)
|
||||||
c = conn.cursor()
|
|
||||||
|
|
||||||
c.execute(
|
try:
|
||||||
"CREATE INDEX CONCURRENTLY event_search_fts_idx_gist"
|
c = conn.cursor()
|
||||||
" ON event_search USING GIST (vector)"
|
|
||||||
)
|
|
||||||
|
|
||||||
c.execute("DROP INDEX event_search_fts_idx")
|
# if we skipped the conversion to GIST, we may already/still
|
||||||
|
# have an event_search_fts_idx; unfortunately postgres 9.4
|
||||||
|
# doesn't support CREATE INDEX IF EXISTS so we just catch the
|
||||||
|
# exception and ignore it.
|
||||||
|
import psycopg2
|
||||||
|
try:
|
||||||
|
c.execute(
|
||||||
|
"CREATE INDEX CONCURRENTLY event_search_fts_idx"
|
||||||
|
" ON event_search USING GIN (vector)"
|
||||||
|
)
|
||||||
|
except psycopg2.ProgrammingError as e:
|
||||||
|
logger.warn(
|
||||||
|
"Ignoring error %r when trying to switch from GIST to GIN",
|
||||||
|
e
|
||||||
|
)
|
||||||
|
|
||||||
conn.set_session(autocommit=False)
|
# we should now be able to delete the GIST index.
|
||||||
|
c.execute(
|
||||||
|
"DROP INDEX IF EXISTS event_search_fts_idx_gist"
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
conn.set_session(autocommit=False)
|
||||||
|
|
||||||
if isinstance(self.database_engine, PostgresEngine):
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
yield self.runWithConnection(create_index)
|
yield self.runWithConnection(create_index)
|
||||||
|
|
||||||
yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME)
|
yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME)
|
||||||
defer.returnValue(1)
|
defer.returnValue(1)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
|
@ -289,7 +323,30 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
entry.stream_ordering, entry.origin_server_ts,
|
entry.stream_ordering, entry.origin_server_ts,
|
||||||
) for entry in entries)
|
) for entry in entries)
|
||||||
|
|
||||||
|
# inserts to a GIN index are normally batched up into a pending
|
||||||
|
# list, and then all committed together once the list gets to a
|
||||||
|
# certain size. The trouble with that is that postgres (pre-9.5)
|
||||||
|
# uses work_mem to determine the length of the list, and work_mem
|
||||||
|
# is typically very large.
|
||||||
|
#
|
||||||
|
# We therefore reduce work_mem while we do the insert.
|
||||||
|
#
|
||||||
|
# (postgres 9.5 uses the separate gin_pending_list_limit setting,
|
||||||
|
# so doesn't suffer the same problem, but changing work_mem will
|
||||||
|
# be harmless)
|
||||||
|
#
|
||||||
|
# Note that we don't need to worry about restoring it on
|
||||||
|
# exception, because exceptions will cause the transaction to be
|
||||||
|
# rolled back, including the effects of the SET command.
|
||||||
|
#
|
||||||
|
# Also: we use SET rather than SET LOCAL because there's lots of
|
||||||
|
# other stuff going on in this transaction, which want to have the
|
||||||
|
# normal work_mem setting.
|
||||||
|
|
||||||
|
txn.execute("SET work_mem='256kB'")
|
||||||
txn.executemany(sql, args)
|
txn.executemany(sql, args)
|
||||||
|
txn.execute("RESET work_mem")
|
||||||
|
|
||||||
elif isinstance(self.database_engine, Sqlite3Engine):
|
elif isinstance(self.database_engine, Sqlite3Engine):
|
||||||
sql = (
|
sql = (
|
||||||
"INSERT INTO event_search (event_id, room_id, key, value)"
|
"INSERT INTO event_search (event_id, room_id, key, value)"
|
||||||
|
|
Loading…
Reference in a new issue