mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-20 23:03:53 +01:00
Optimise _update_client_ips_batch_txn
to batch together database operations. (#12252)
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
This commit is contained in:
parent
0cd182f296
commit
e630722f11
4 changed files with 191 additions and 52 deletions
1
changelog.d/12252.feature
Normal file
1
changelog.d/12252.feature
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Move `update_client_ip` background job from the main process to the background worker.
|
|
@ -1268,6 +1268,7 @@ class DatabasePool:
|
||||||
value_names: Collection[str],
|
value_names: Collection[str],
|
||||||
value_values: Collection[Collection[Any]],
|
value_values: Collection[Collection[Any]],
|
||||||
desc: str,
|
desc: str,
|
||||||
|
lock: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Upsert, many times.
|
Upsert, many times.
|
||||||
|
@ -1279,6 +1280,8 @@ class DatabasePool:
|
||||||
value_names: The value column names
|
value_names: The value column names
|
||||||
value_values: A list of each row's value column values.
|
value_values: A list of each row's value column values.
|
||||||
Ignored if value_names is empty.
|
Ignored if value_names is empty.
|
||||||
|
lock: True to lock the table when doing the upsert. Unused if the database engine
|
||||||
|
supports native upserts.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# We can autocommit if we are going to use native upserts
|
# We can autocommit if we are going to use native upserts
|
||||||
|
@ -1286,7 +1289,7 @@ class DatabasePool:
|
||||||
self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables
|
self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables
|
||||||
)
|
)
|
||||||
|
|
||||||
return await self.runInteraction(
|
await self.runInteraction(
|
||||||
desc,
|
desc,
|
||||||
self.simple_upsert_many_txn,
|
self.simple_upsert_many_txn,
|
||||||
table,
|
table,
|
||||||
|
@ -1294,6 +1297,7 @@ class DatabasePool:
|
||||||
key_values,
|
key_values,
|
||||||
value_names,
|
value_names,
|
||||||
value_values,
|
value_values,
|
||||||
|
lock=lock,
|
||||||
db_autocommit=autocommit,
|
db_autocommit=autocommit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1305,6 +1309,7 @@ class DatabasePool:
|
||||||
key_values: Collection[Iterable[Any]],
|
key_values: Collection[Iterable[Any]],
|
||||||
value_names: Collection[str],
|
value_names: Collection[str],
|
||||||
value_values: Iterable[Iterable[Any]],
|
value_values: Iterable[Iterable[Any]],
|
||||||
|
lock: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Upsert, many times.
|
Upsert, many times.
|
||||||
|
@ -1316,6 +1321,8 @@ class DatabasePool:
|
||||||
value_names: The value column names
|
value_names: The value column names
|
||||||
value_values: A list of each row's value column values.
|
value_values: A list of each row's value column values.
|
||||||
Ignored if value_names is empty.
|
Ignored if value_names is empty.
|
||||||
|
lock: True to lock the table when doing the upsert. Unused if the database engine
|
||||||
|
supports native upserts.
|
||||||
"""
|
"""
|
||||||
if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
|
if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
|
||||||
return self.simple_upsert_many_txn_native_upsert(
|
return self.simple_upsert_many_txn_native_upsert(
|
||||||
|
@ -1323,7 +1330,7 @@ class DatabasePool:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return self.simple_upsert_many_txn_emulated(
|
return self.simple_upsert_many_txn_emulated(
|
||||||
txn, table, key_names, key_values, value_names, value_values
|
txn, table, key_names, key_values, value_names, value_values, lock=lock
|
||||||
)
|
)
|
||||||
|
|
||||||
def simple_upsert_many_txn_emulated(
|
def simple_upsert_many_txn_emulated(
|
||||||
|
@ -1334,6 +1341,7 @@ class DatabasePool:
|
||||||
key_values: Collection[Iterable[Any]],
|
key_values: Collection[Iterable[Any]],
|
||||||
value_names: Collection[str],
|
value_names: Collection[str],
|
||||||
value_values: Iterable[Iterable[Any]],
|
value_values: Iterable[Iterable[Any]],
|
||||||
|
lock: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Upsert, many times, but without native UPSERT support or batching.
|
Upsert, many times, but without native UPSERT support or batching.
|
||||||
|
@ -1345,17 +1353,24 @@ class DatabasePool:
|
||||||
value_names: The value column names
|
value_names: The value column names
|
||||||
value_values: A list of each row's value column values.
|
value_values: A list of each row's value column values.
|
||||||
Ignored if value_names is empty.
|
Ignored if value_names is empty.
|
||||||
|
lock: True to lock the table when doing the upsert.
|
||||||
"""
|
"""
|
||||||
# No value columns, therefore make a blank list so that the following
|
# No value columns, therefore make a blank list so that the following
|
||||||
# zip() works correctly.
|
# zip() works correctly.
|
||||||
if not value_names:
|
if not value_names:
|
||||||
value_values = [() for x in range(len(key_values))]
|
value_values = [() for x in range(len(key_values))]
|
||||||
|
|
||||||
|
if lock:
|
||||||
|
# Lock the table just once, to prevent it being done once per row.
|
||||||
|
# Note that, according to Postgres' documentation, once obtained,
|
||||||
|
# the lock is held for the remainder of the current transaction.
|
||||||
|
self.engine.lock_table(txn, "user_ips")
|
||||||
|
|
||||||
for keyv, valv in zip(key_values, value_values):
|
for keyv, valv in zip(key_values, value_values):
|
||||||
_keys = {x: y for x, y in zip(key_names, keyv)}
|
_keys = {x: y for x, y in zip(key_names, keyv)}
|
||||||
_vals = {x: y for x, y in zip(value_names, valv)}
|
_vals = {x: y for x, y in zip(value_names, valv)}
|
||||||
|
|
||||||
self.simple_upsert_txn_emulated(txn, table, _keys, _vals)
|
self.simple_upsert_txn_emulated(txn, table, _keys, _vals, lock=False)
|
||||||
|
|
||||||
def simple_upsert_many_txn_native_upsert(
|
def simple_upsert_many_txn_native_upsert(
|
||||||
self,
|
self,
|
||||||
|
@ -1792,6 +1807,86 @@ class DatabasePool:
|
||||||
|
|
||||||
return txn.rowcount
|
return txn.rowcount
|
||||||
|
|
||||||
|
async def simple_update_many(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
key_names: Collection[str],
|
||||||
|
key_values: Collection[Iterable[Any]],
|
||||||
|
value_names: Collection[str],
|
||||||
|
value_values: Iterable[Iterable[Any]],
|
||||||
|
desc: str,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Update, many times, using batching where possible.
|
||||||
|
If the keys don't match anything, nothing will be updated.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: The table to update
|
||||||
|
key_names: The key column names.
|
||||||
|
key_values: A list of each row's key column values.
|
||||||
|
value_names: The names of value columns to update.
|
||||||
|
value_values: A list of each row's value column values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
await self.runInteraction(
|
||||||
|
desc,
|
||||||
|
self.simple_update_many_txn,
|
||||||
|
table,
|
||||||
|
key_names,
|
||||||
|
key_values,
|
||||||
|
value_names,
|
||||||
|
value_values,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def simple_update_many_txn(
|
||||||
|
txn: LoggingTransaction,
|
||||||
|
table: str,
|
||||||
|
key_names: Collection[str],
|
||||||
|
key_values: Collection[Iterable[Any]],
|
||||||
|
value_names: Collection[str],
|
||||||
|
value_values: Collection[Iterable[Any]],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Update, many times, using batching where possible.
|
||||||
|
If the keys don't match anything, nothing will be updated.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: The table to update
|
||||||
|
key_names: The key column names.
|
||||||
|
key_values: A list of each row's key column values.
|
||||||
|
value_names: The names of value columns to update.
|
||||||
|
value_values: A list of each row's value column values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(value_values) != len(key_values):
|
||||||
|
raise ValueError(
|
||||||
|
f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number."
|
||||||
|
)
|
||||||
|
|
||||||
|
# List of tuples of (value values, then key values)
|
||||||
|
# (This matches the order needed for the query)
|
||||||
|
args = [tuple(x) + tuple(y) for x, y in zip(value_values, key_values)]
|
||||||
|
|
||||||
|
for ks, vs in zip(key_values, value_values):
|
||||||
|
args.append(tuple(vs) + tuple(ks))
|
||||||
|
|
||||||
|
# 'col1 = ?, col2 = ?, ...'
|
||||||
|
set_clause = ", ".join(f"{n} = ?" for n in value_names)
|
||||||
|
|
||||||
|
if key_names:
|
||||||
|
# 'WHERE col3 = ? AND col4 = ? AND col5 = ?'
|
||||||
|
where_clause = "WHERE " + (" AND ".join(f"{n} = ?" for n in key_names))
|
||||||
|
else:
|
||||||
|
where_clause = ""
|
||||||
|
|
||||||
|
# UPDATE mytable SET col1 = ?, col2 = ? WHERE col3 = ? AND col4 = ?
|
||||||
|
sql = f"""
|
||||||
|
UPDATE {table} SET {set_clause} {where_clause}
|
||||||
|
"""
|
||||||
|
|
||||||
|
txn.execute_batch(sql, args)
|
||||||
|
|
||||||
async def simple_update_one(
|
async def simple_update_one(
|
||||||
self,
|
self,
|
||||||
table: str,
|
table: str,
|
||||||
|
|
|
@ -616,6 +616,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke
|
||||||
to_update = self._batch_row_update
|
to_update = self._batch_row_update
|
||||||
self._batch_row_update = {}
|
self._batch_row_update = {}
|
||||||
|
|
||||||
|
if to_update:
|
||||||
await self.db_pool.runInteraction(
|
await self.db_pool.runInteraction(
|
||||||
"_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
|
"_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
|
||||||
)
|
)
|
||||||
|
@ -629,41 +630,42 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke
|
||||||
self._update_on_this_worker
|
self._update_on_this_worker
|
||||||
), "This worker is not designated to update client IPs"
|
), "This worker is not designated to update client IPs"
|
||||||
|
|
||||||
if "user_ips" in self.db_pool._unsafe_to_upsert_tables or (
|
# Keys and values for the `user_ips` upsert.
|
||||||
not self.database_engine.can_native_upsert
|
user_ips_keys = []
|
||||||
):
|
user_ips_values = []
|
||||||
self.database_engine.lock_table(txn, "user_ips")
|
|
||||||
|
# Keys and values for the `devices` update.
|
||||||
|
devices_keys = []
|
||||||
|
devices_values = []
|
||||||
|
|
||||||
for entry in to_update.items():
|
for entry in to_update.items():
|
||||||
(user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
|
(user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
|
||||||
|
user_ips_keys.append((user_id, access_token, ip))
|
||||||
self.db_pool.simple_upsert_txn(
|
user_ips_values.append((user_agent, device_id, last_seen))
|
||||||
txn,
|
|
||||||
table="user_ips",
|
|
||||||
keyvalues={"user_id": user_id, "access_token": access_token, "ip": ip},
|
|
||||||
values={
|
|
||||||
"user_agent": user_agent,
|
|
||||||
"device_id": device_id,
|
|
||||||
"last_seen": last_seen,
|
|
||||||
},
|
|
||||||
lock=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Technically an access token might not be associated with
|
# Technically an access token might not be associated with
|
||||||
# a device so we need to check.
|
# a device so we need to check.
|
||||||
if device_id:
|
if device_id:
|
||||||
# this is always an update rather than an upsert: the row should
|
devices_keys.append((user_id, device_id))
|
||||||
# already exist, and if it doesn't, that may be because it has been
|
devices_values.append((user_agent, last_seen, ip))
|
||||||
# deleted, and we don't want to re-create it.
|
|
||||||
self.db_pool.simple_update_txn(
|
self.db_pool.simple_upsert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="user_ips",
|
||||||
|
key_names=("user_id", "access_token", "ip"),
|
||||||
|
key_values=user_ips_keys,
|
||||||
|
value_names=("user_agent", "device_id", "last_seen"),
|
||||||
|
value_values=user_ips_values,
|
||||||
|
)
|
||||||
|
|
||||||
|
if devices_values:
|
||||||
|
self.db_pool.simple_update_many_txn(
|
||||||
txn,
|
txn,
|
||||||
table="devices",
|
table="devices",
|
||||||
keyvalues={"user_id": user_id, "device_id": device_id},
|
key_names=("user_id", "device_id"),
|
||||||
updatevalues={
|
key_values=devices_keys,
|
||||||
"user_agent": user_agent,
|
value_names=("user_agent", "last_seen", "ip"),
|
||||||
"last_seen": last_seen,
|
value_values=devices_values,
|
||||||
"ip": ip,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_last_client_ip_by_device(
|
async def get_last_client_ip_by_device(
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import secrets
|
import secrets
|
||||||
from typing import Any, Dict, Generator, List, Tuple
|
from typing import Generator, Tuple
|
||||||
|
|
||||||
from twisted.test.proto_helpers import MemoryReactor
|
from twisted.test.proto_helpers import MemoryReactor
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ from synapse.util import Clock
|
||||||
from tests import unittest
|
from tests import unittest
|
||||||
|
|
||||||
|
|
||||||
class UpsertManyTests(unittest.HomeserverTestCase):
|
class UpdateUpsertManyTests(unittest.HomeserverTestCase):
|
||||||
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||||
self.storage = hs.get_datastores().main
|
self.storage = hs.get_datastores().main
|
||||||
|
|
||||||
|
@ -46,9 +46,13 @@ class UpsertManyTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _dump_to_tuple(
|
def _dump_table_to_tuple(self) -> Generator[Tuple[int, str, str], None, None]:
|
||||||
self, res: List[Dict[str, Any]]
|
res = self.get_success(
|
||||||
) -> Generator[Tuple[int, str, str], None, None]:
|
self.storage.db_pool.simple_select_list(
|
||||||
|
self.table_name, None, ["id, username, value"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for i in res:
|
for i in res:
|
||||||
yield (i["id"], i["username"], i["value"])
|
yield (i["id"], i["username"], i["value"])
|
||||||
|
|
||||||
|
@ -75,13 +79,8 @@ class UpsertManyTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check results are what we expect
|
# Check results are what we expect
|
||||||
res = self.get_success(
|
|
||||||
self.storage.db_pool.simple_select_list(
|
|
||||||
self.table_name, None, ["id, username, value"]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
set(self._dump_to_tuple(res)),
|
set(self._dump_table_to_tuple()),
|
||||||
{(1, "user1", "hello"), (2, "user2", "there")},
|
{(1, "user1", "hello"), (2, "user2", "there")},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -102,12 +101,54 @@ class UpsertManyTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check results are what we expect
|
# Check results are what we expect
|
||||||
res = self.get_success(
|
|
||||||
self.storage.db_pool.simple_select_list(
|
|
||||||
self.table_name, None, ["id, username, value"]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
set(self._dump_to_tuple(res)),
|
set(self._dump_table_to_tuple()),
|
||||||
{(1, "user1", "hello"), (2, "user2", "bleb")},
|
{(1, "user1", "hello"), (2, "user2", "bleb")},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_simple_update_many(self):
|
||||||
|
"""
|
||||||
|
simple_update_many performs many updates at once.
|
||||||
|
"""
|
||||||
|
# First add some data.
|
||||||
|
self.get_success(
|
||||||
|
self.storage.db_pool.simple_insert_many(
|
||||||
|
table=self.table_name,
|
||||||
|
keys=("id", "username", "value"),
|
||||||
|
values=[(1, "alice", "A"), (2, "bob", "B"), (3, "charlie", "C")],
|
||||||
|
desc="insert",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check the data made it to the table
|
||||||
|
self.assertEqual(
|
||||||
|
set(self._dump_table_to_tuple()),
|
||||||
|
{(1, "alice", "A"), (2, "bob", "B"), (3, "charlie", "C")},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now use simple_update_many
|
||||||
|
self.get_success(
|
||||||
|
self.storage.db_pool.simple_update_many(
|
||||||
|
table=self.table_name,
|
||||||
|
key_names=("username",),
|
||||||
|
key_values=(
|
||||||
|
("alice",),
|
||||||
|
("bob",),
|
||||||
|
("stranger",),
|
||||||
|
),
|
||||||
|
value_names=("value",),
|
||||||
|
value_values=(
|
||||||
|
("aaa!",),
|
||||||
|
("bbb!",),
|
||||||
|
("???",),
|
||||||
|
),
|
||||||
|
desc="update_many1",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check the table is how we expect:
|
||||||
|
# charlie has been left alone
|
||||||
|
self.assertEqual(
|
||||||
|
set(self._dump_table_to_tuple()),
|
||||||
|
{(1, "alice", "aaa!"), (2, "bob", "bbb!"), (3, "charlie", "C")},
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue