forked from MirrorHub/synapse
Use a background task to update databases to use the full text search
This commit is contained in:
parent
36c58b18a3
commit
90b503216c
2 changed files with 25 additions and 77 deletions
|
@ -22,7 +22,7 @@ import ujson
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
POSTGRES_SQL = """
|
POSTGRES_TABLE = """
|
||||||
CREATE TABLE IF NOT EXISTS event_search (
|
CREATE TABLE IF NOT EXISTS event_search (
|
||||||
event_id TEXT,
|
event_id TEXT,
|
||||||
room_id TEXT,
|
room_id TEXT,
|
||||||
|
@ -31,22 +31,6 @@ CREATE TABLE IF NOT EXISTS event_search (
|
||||||
vector tsvector
|
vector tsvector
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO event_search SELECT
|
|
||||||
event_id, room_id, json::json->>'sender', 'content.body',
|
|
||||||
to_tsvector('english', json::json->'content'->>'body')
|
|
||||||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';
|
|
||||||
|
|
||||||
INSERT INTO event_search SELECT
|
|
||||||
event_id, room_id, json::json->>'sender', 'content.name',
|
|
||||||
to_tsvector('english', json::json->'content'->>'name')
|
|
||||||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';
|
|
||||||
|
|
||||||
INSERT INTO event_search SELECT
|
|
||||||
event_id, room_id, json::json->>'sender', 'content.topic',
|
|
||||||
to_tsvector('english', json::json->'content'->>'topic')
|
|
||||||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
|
|
||||||
|
|
||||||
|
|
||||||
CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
|
CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
|
||||||
CREATE INDEX event_search_ev_idx ON event_search(event_id);
|
CREATE INDEX event_search_ev_idx ON event_search(event_id);
|
||||||
CREATE INDEX event_search_ev_ridx ON event_search(room_id);
|
CREATE INDEX event_search_ev_ridx ON event_search(room_id);
|
||||||
|
@ -61,67 +45,31 @@ SQLITE_TABLE = (
|
||||||
|
|
||||||
def run_upgrade(cur, database_engine, *args, **kwargs):
|
def run_upgrade(cur, database_engine, *args, **kwargs):
|
||||||
if isinstance(database_engine, PostgresEngine):
|
if isinstance(database_engine, PostgresEngine):
|
||||||
run_postgres_upgrade(cur)
|
for statement in get_statements(POSTGRES_TABLE.splitlines()):
|
||||||
|
cur.execute(statement)
|
||||||
return
|
return
|
||||||
|
|
||||||
if isinstance(database_engine, Sqlite3Engine):
|
if isinstance(database_engine, Sqlite3Engine):
|
||||||
run_sqlite_upgrade(cur)
|
cur.execute(SQLITE_TABLE)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
cur.execute("SELECT MIN(stream_ordering) FROM events")
|
||||||
|
rows = cur.fetchall()
|
||||||
|
min_stream_id = rows[0][0]
|
||||||
|
|
||||||
def run_postgres_upgrade(cur):
|
cur.execute("SELECT MAX(stream_ordering) FROM events")
|
||||||
for statement in get_statements(POSTGRES_SQL.splitlines()):
|
rows = cur.fetchall()
|
||||||
cur.execute(statement)
|
max_stream_id = rows[0][0]
|
||||||
|
|
||||||
|
if min_stream_id is not None and max_stream_id is not None:
|
||||||
|
progress = {
|
||||||
|
"target_min_stream_id_inclusive": min_stream_id,
|
||||||
|
"max_stream_id_exclusive": max_stream_id + 1,
|
||||||
|
"rows_inserted": 0,
|
||||||
|
}
|
||||||
|
progress_json = ujson.dumps(progress)
|
||||||
|
|
||||||
def run_sqlite_upgrade(cur):
|
cur.execute(
|
||||||
cur.execute(SQLITE_TABLE)
|
"INSERT into background_updates (update_name, progress_json)"
|
||||||
|
" VALUES (?, ?)", ("event_search", progress_json)
|
||||||
rowid = -1
|
)
|
||||||
while True:
|
|
||||||
cur.execute(
|
|
||||||
"SELECT rowid, json FROM event_json"
|
|
||||||
" WHERE rowid > ?"
|
|
||||||
" ORDER BY rowid ASC LIMIT 100",
|
|
||||||
(rowid,)
|
|
||||||
)
|
|
||||||
|
|
||||||
res = cur.fetchall()
|
|
||||||
|
|
||||||
if not res:
|
|
||||||
break
|
|
||||||
|
|
||||||
events = [
|
|
||||||
ujson.loads(js)
|
|
||||||
for _, js in res
|
|
||||||
]
|
|
||||||
|
|
||||||
rowid = max(rid for rid, _ in res)
|
|
||||||
|
|
||||||
rows = []
|
|
||||||
for ev in events:
|
|
||||||
content = ev.get("content", {})
|
|
||||||
body = content.get("body", None)
|
|
||||||
name = content.get("name", None)
|
|
||||||
topic = content.get("topic", None)
|
|
||||||
sender = ev.get("sender", None)
|
|
||||||
if ev["type"] == "m.room.message" and body:
|
|
||||||
rows.append((
|
|
||||||
ev["event_id"], ev["room_id"], sender, "content.body", body
|
|
||||||
))
|
|
||||||
if ev["type"] == "m.room.name" and name:
|
|
||||||
rows.append((
|
|
||||||
ev["event_id"], ev["room_id"], sender, "content.name", name
|
|
||||||
))
|
|
||||||
if ev["type"] == "m.room.topic" and topic:
|
|
||||||
rows.append((
|
|
||||||
ev["event_id"], ev["room_id"], sender, "content.topic", topic
|
|
||||||
))
|
|
||||||
|
|
||||||
if rows:
|
|
||||||
logger.info(rows)
|
|
||||||
cur.executemany(
|
|
||||||
"INSERT INTO event_search (event_id, room_id, sender, key, value)"
|
|
||||||
" VALUES (?,?,?,?,?)",
|
|
||||||
rows
|
|
||||||
)
|
|
||||||
|
|
|
@ -37,8 +37,8 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _background_reindex_search(self, progress, batch_size):
|
def _background_reindex_search(self, progress, batch_size):
|
||||||
target_min_stream_id = progress["target_min_stream_id"]
|
target_min_stream_id = progress["target_min_stream_id_inclusive"]
|
||||||
max_stream_id = progress["max_stream_id"]
|
max_stream_id = progress["max_stream_id_exclusive"]
|
||||||
rows_inserted = progress.get("rows_inserted", 0)
|
rows_inserted = progress.get("rows_inserted", 0)
|
||||||
|
|
||||||
INSERT_CLUMP_SIZE = 1000
|
INSERT_CLUMP_SIZE = 1000
|
||||||
|
@ -105,8 +105,8 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
txn.execute_many(sql, clump)
|
txn.execute_many(sql, clump)
|
||||||
|
|
||||||
progress = {
|
progress = {
|
||||||
"target_max_stream_id": target_min_stream_id,
|
"target_min_stream_id_inclusive": target_min_stream_id,
|
||||||
"max_stream_id": min_stream_id,
|
"max_stream_id_exclusive": min_stream_id,
|
||||||
"rows_inserted": rows_inserted + len(event_search_rows)
|
"rows_inserted": rows_inserted + len(event_search_rows)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue