Merge pull request #2769 from matrix-org/matthew/hit_the_gin

switch back from GIST to GIN indexes
This commit is contained in:
Richard van der Hoff 2018-02-14 16:59:03 +00:00 committed by GitHub
commit d28ec43e15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 19 deletions

View file

@ -242,6 +242,25 @@ class BackgroundUpdateStore(SQLBaseStore):
""" """
self._background_update_handlers[update_name] = update_handler self._background_update_handlers[update_name] = update_handler
def register_noop_background_update(self, update_name):
"""Register a noop handler for a background update.
This is useful when we previously did a background update, but no
longer wish to do the update. In this case the background update should
be removed from the schema delta files, but there may still be some
users who have the background update queued, so this method should
also be called to clear the update.
Args:
update_name (str): Name of update
"""
@defer.inlineCallbacks
def noop_update(progress, batch_size):
yield self._end_background_update(update_name)
defer.returnValue(1)
self.register_background_update_handler(update_name, noop_update)
def register_background_index_update(self, update_name, index_name, def register_background_index_update(self, update_name, index_name,
table, columns, where_clause=None, table, columns, where_clause=None,
unique=False, unique=False,

View file

@ -39,12 +39,7 @@ class RegistrationStore(background_updates.BackgroundUpdateStore):
# we no longer use refresh tokens, but it's possible that some people # we no longer use refresh tokens, but it's possible that some people
# might have a background update queued to build this index. Just # might have a background update queued to build this index. Just
# clear the background update. # clear the background update.
@defer.inlineCallbacks self.register_noop_background_update("refresh_tokens_device_index")
def noop_update(progress, batch_size):
yield self._end_background_update("refresh_tokens_device_index")
defer.returnValue(1)
self.register_background_update_handler(
"refresh_tokens_device_index", noop_update)
@defer.inlineCallbacks @defer.inlineCallbacks
def add_access_token_to_user(self, user_id, token, device_id=None): def add_access_token_to_user(self, user_id, token, device_id=None):

View file

@ -13,5 +13,7 @@
* limitations under the License. * limitations under the License.
*/ */
INSERT into background_updates (update_name, progress_json) -- We no longer do this given we back it out again in schema 47
VALUES ('event_search_postgres_gist', '{}');
-- INSERT into background_updates (update_name, progress_json)
-- VALUES ('event_search_postgres_gist', '{}');

View file

@ -0,0 +1,17 @@
/* Copyright 2018 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
INSERT into background_updates (update_name, progress_json)
VALUES ('event_search_postgres_gin', '{}');

View file

@ -38,6 +38,7 @@ class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_UPDATE_NAME = "event_search"
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
def __init__(self, db_conn, hs): def __init__(self, db_conn, hs):
super(SearchStore, self).__init__(db_conn, hs) super(SearchStore, self).__init__(db_conn, hs)
@ -48,9 +49,19 @@ class SearchStore(BackgroundUpdateStore):
self.EVENT_SEARCH_ORDER_UPDATE_NAME, self.EVENT_SEARCH_ORDER_UPDATE_NAME,
self._background_reindex_search_order self._background_reindex_search_order
) )
self.register_background_update_handler(
# we used to have a background update to turn the GIN index into a
# GIST one; we no longer do that (obviously) because we actually want
# a GIN index. However, it's possible that some people might still have
# the background update queued, so we register a handler to clear the
# background update.
self.register_noop_background_update(
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME, self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
self._background_reindex_gist_search )
self.register_background_update_handler(
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME,
self._background_reindex_gin_search
) )
@defer.inlineCallbacks @defer.inlineCallbacks
@ -151,25 +162,48 @@ class SearchStore(BackgroundUpdateStore):
defer.returnValue(result) defer.returnValue(result)
@defer.inlineCallbacks @defer.inlineCallbacks
def _background_reindex_gist_search(self, progress, batch_size): def _background_reindex_gin_search(self, progress, batch_size):
"""This handles old synapses which used GIST indexes, if any;
converting them back to be GIN as per the actual schema.
"""
def create_index(conn): def create_index(conn):
conn.rollback() conn.rollback()
# we have to set autocommit, because postgres refuses to
# CREATE INDEX CONCURRENTLY without it.
conn.set_session(autocommit=True) conn.set_session(autocommit=True)
c = conn.cursor()
c.execute( try:
"CREATE INDEX CONCURRENTLY event_search_fts_idx_gist" c = conn.cursor()
" ON event_search USING GIST (vector)"
)
c.execute("DROP INDEX event_search_fts_idx") # if we skipped the conversion to GIST, we may already/still
# have an event_search_fts_idx; unfortunately postgres 9.4
# doesn't support CREATE INDEX IF EXISTS so we just catch the
# exception and ignore it.
import psycopg2
try:
c.execute(
"CREATE INDEX CONCURRENTLY event_search_fts_idx"
" ON event_search USING GIN (vector)"
)
except psycopg2.ProgrammingError as e:
logger.warn(
"Ignoring error %r when trying to switch from GIST to GIN",
e
)
conn.set_session(autocommit=False) # we should now be able to delete the GIST index.
c.execute(
"DROP INDEX IF EXISTS event_search_fts_idx_gist"
)
finally:
conn.set_session(autocommit=False)
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
yield self.runWithConnection(create_index) yield self.runWithConnection(create_index)
yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME) yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME)
defer.returnValue(1) defer.returnValue(1)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -289,7 +323,30 @@ class SearchStore(BackgroundUpdateStore):
entry.stream_ordering, entry.origin_server_ts, entry.stream_ordering, entry.origin_server_ts,
) for entry in entries) ) for entry in entries)
# inserts to a GIN index are normally batched up into a pending
# list, and then all committed together once the list gets to a
# certain size. The trouble with that is that postgres (pre-9.5)
# uses work_mem to determine the length of the list, and work_mem
# is typically very large.
#
# We therefore reduce work_mem while we do the insert.
#
# (postgres 9.5 uses the separate gin_pending_list_limit setting,
# so doesn't suffer the same problem, but changing work_mem will
# be harmless)
#
# Note that we don't need to worry about restoring it on
# exception, because exceptions will cause the transaction to be
# rolled back, including the effects of the SET command.
#
# Also: we use SET rather than SET LOCAL because there's lots of
# other stuff going on in this transaction, which want to have the
# normal work_mem setting.
txn.execute("SET work_mem='256kB'")
txn.executemany(sql, args) txn.executemany(sql, args)
txn.execute("RESET work_mem")
elif isinstance(self.database_engine, Sqlite3Engine): elif isinstance(self.database_engine, Sqlite3Engine):
sql = ( sql = (
"INSERT INTO event_search (event_id, room_id, key, value)" "INSERT INTO event_search (event_id, room_id, key, value)"