From 12649ccedd858cbe075271ea234b1e268f973c5a Mon Sep 17 00:00:00 2001 From: PiotrKozimor <37144818+PiotrKozimor@users.noreply.github.com> Date: Tue, 27 Sep 2022 10:41:36 +0200 Subject: [PATCH] Improve selectRoomIDsWithAnyMembershipSQL performance (#2738) Recently I have observed that dendrite spends a lot of time (~390s) in `selectRoomIDsWithAnyMembershipSQL` query ``` dendrite_syncapi=# select total_exec_time, left(query,100) from pg_stat_statements order by total_exec_time desc limit 5 ; total_exec_time | left --------------------+------------------------------------------------------------------------------------------------------ 747826.5800519128 | SELECT event_id, id, headered_event_json, session_id, exclude_from_sync, transaction_id, history_vis 389130.5490339942 | SELECT DISTINCT room_id, membership FROM syncapi_current_room_state WHERE type = $2 AND state_key = 376104.17514700035 | SELECT psd.datname, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_ 363644.164092031 | SELECT event_type_nid, event_state_key_nid, event_nid FROM roomserver_events WHERE event_nid = ANY($ 58570.48104699995 | SELECT event_id, headered_event_json FROM syncapi_current_room_state WHERE room_id = $1 AND ( $2::te (5 rows) ``` Explain analyze showed correct usage of `syncapi_room_state_unique` index: ``` dendrite_syncapi=# explain analyze SELECT distinct room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = '@qjfl:dendrite.stg.globekeeper.com'; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Unique (cost=2749.38..2749.56 rows=24 width=52) (actual time=2.933..2.956 rows=65 loops=1) -> Sort (cost=2749.38..2749.44 rows=24 width=52) (actual time=2.932..2.937 rows=65 loops=1) Sort Key: room_id, membership Sort Method: quicksort Memory: 34kB -> Index Scan using syncapi_room_state_unique on syncapi_current_room_state (cost=0.41..2748.83 rows=24 width=52) (actual time=0.030..2.890 rows=65 loops=1) Index Cond: ((type = 'm.room.member'::text) AND (state_key = '@qjfl:dendrite.stg.globekeeper.com'::text)) Planning Time: 0.140 ms Execution Time: 2.990 ms (8 rows) ``` Multi-column indexes in Postgres shall perform well for leftmost columns, but I gave it a try and created `syncapi_current_room_state_type_state_key_idx` index. I could observe significant performance improvement. Execution time dropped from 2.9 ms to 0.24 ms: ``` explain analyze SELECT distinct room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = '@qjfl:dendrite.stg.globekeeper.com'; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------- Unique (cost=96.46..96.64 rows=24 width=52) (actual time=0.199..0.218 rows=65 loops=1) -> Sort (cost=96.46..96.52 rows=24 width=52) (actual time=0.199..0.202 rows=65 loops=1) Sort Key: room_id, membership Sort Method: quicksort Memory: 34kB -> Bitmap Heap Scan on syncapi_current_room_state (cost=4.53..95.91 rows=24 width=52) (actual time=0.048..0.139 rows=65 loops=1) Recheck Cond: ((type = 'm.room.member'::text) AND (state_key = '@qjfl:dendrite.stg.globekeeper.com'::text)) Heap Blocks: exact=59 -> Bitmap Index Scan on syncapi_current_room_state_type_state_key_idx (cost=0.00..4.53 rows=24 width=0) (actual time=0.037..0.037 rows=65 loops=1) Index Cond: ((type = 'm.room.member'::text) AND (state_key = '@qjfl:dendrite.stg.globekeeper.com'::text)) Planning Time: 0.236 ms Execution Time: 0.242 ms (11 rows) ``` Next improvement is skipping DISTINCT and rely on map assignment in `SelectRoomIDsWithAnyMembership`. Execution time drops by almost half: ``` explain analyze SELECT room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = '@qjfl:dendrite.stg.globekeeper.com'; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------- Bitmap Heap Scan on syncapi_current_room_state (cost=4.53..95.91 rows=24 width=52) (actual time=0.032..0.113 rows=65 loops=1) Recheck Cond: ((type = 'm.room.member'::text) AND (state_key = '@qjfl:dendrite.stg.globekeeper.com'::text)) Heap Blocks: exact=59 -> Bitmap Index Scan on syncapi_current_room_state_type_state_key_idx (cost=0.00..4.53 rows=24 width=0) (actual time=0.021..0.021 rows=65 loops=1) Index Cond: ((type = 'm.room.member'::text) AND (state_key = '@qjfl:dendrite.stg.globekeeper.com'::text)) Planning Time: 0.087 ms Execution Time: 0.136 ms (7 rows) ``` In our env we spend only 1s on inserting to table, so the write penalty of creating an index should be small. ``` dendrite_syncapi=# select total_exec_time, left(query,100) from pg_stat_statements where query like '%INSERT%syncapi_current_room_state%' order by total_exec_time desc; total_exec_time | left --------------------+------------------------------------------------------------------------------------------------------ 1139.9057619999971 | INSERT INTO syncapi_current_room_state (room_id, event_id, type, sender, contains_url, state_key, he (1 row) ``` This PR does not require test modifications. ### Pull Request Checklist * [x] I have added added tests for PR _or_ I have justified why this PR doesn't need tests. * [x] Pull request includes a [sign off](https://github.com/matrix-org/dendrite/blob/main/docs/CONTRIBUTING.md#sign-off) Signed-off-by: `Piotr Kozimor ` --- syncapi/storage/postgres/current_room_state_table.go | 4 +++- syncapi/storage/sqlite3/current_room_state_table.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/syncapi/storage/postgres/current_room_state_table.go b/syncapi/storage/postgres/current_room_state_table.go index 083d10b88..5e6daaaf8 100644 --- a/syncapi/storage/postgres/current_room_state_table.go +++ b/syncapi/storage/postgres/current_room_state_table.go @@ -62,6 +62,8 @@ CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_s CREATE INDEX IF NOT EXISTS syncapi_membership_idx ON syncapi_current_room_state(type, state_key, membership) WHERE membership IS NOT NULL AND membership != 'leave'; -- for querying state by event IDs CREATE UNIQUE INDEX IF NOT EXISTS syncapi_current_room_state_eventid_idx ON syncapi_current_room_state(event_id); +-- for improving selectRoomIDsWithAnyMembershipSQL +CREATE INDEX IF NOT EXISTS syncapi_current_room_state_type_state_key_idx ON syncapi_current_room_state(type, state_key); ` const upsertRoomStateSQL = "" + @@ -80,7 +82,7 @@ const selectRoomIDsWithMembershipSQL = "" + "SELECT DISTINCT room_id FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1 AND membership = $2" const selectRoomIDsWithAnyMembershipSQL = "" + - "SELECT DISTINCT room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1" + "SELECT room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1" const selectCurrentStateSQL = "" + "SELECT event_id, headered_event_json FROM syncapi_current_room_state WHERE room_id = $1" + diff --git a/syncapi/storage/sqlite3/current_room_state_table.go b/syncapi/storage/sqlite3/current_room_state_table.go index b88c11f88..bd1271dd6 100644 --- a/syncapi/storage/sqlite3/current_room_state_table.go +++ b/syncapi/storage/sqlite3/current_room_state_table.go @@ -51,6 +51,8 @@ CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_current_room_s -- CREATE INDEX IF NOT EXISTS syncapi_membership_idx ON syncapi_current_room_state(type, state_key, membership) WHERE membership IS NOT NULL AND membership != 'leave'; -- for querying state by event IDs CREATE UNIQUE INDEX IF NOT EXISTS syncapi_current_room_state_eventid_idx ON syncapi_current_room_state(event_id); +-- for improving selectRoomIDsWithAnyMembershipSQL +CREATE INDEX IF NOT EXISTS syncapi_current_room_state_type_state_key_idx ON syncapi_current_room_state(type, state_key); ` const upsertRoomStateSQL = "" + @@ -69,7 +71,7 @@ const selectRoomIDsWithMembershipSQL = "" + "SELECT DISTINCT room_id FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1 AND membership = $2" const selectRoomIDsWithAnyMembershipSQL = "" + - "SELECT DISTINCT room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1" + "SELECT room_id, membership FROM syncapi_current_room_state WHERE type = 'm.room.member' AND state_key = $1" const selectCurrentStateSQL = "" + "SELECT event_id, headered_event_json FROM syncapi_current_room_state WHERE room_id = $1"