From 2ade05dca3d6da67e35c3a8ccdd278221f2566ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 14:16:10 +0100 Subject: [PATCH 1/6] Add last seen info to devices table. This allows us to purge old user_ips entries without having to preserve the latest last seen info for active devices. --- synapse/storage/client_ips.py | 15 +++++++++++++ .../schema/delta/56/devices_last_seen.sql | 21 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 synapse/storage/schema/delta/56/devices_last_seen.sql diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6db8c5407..4db2e7f48 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -354,6 +354,21 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): }, lock=False, ) + + # Technically an access token might not be associated with + # a device so we need to check. + if device_id: + self._simple_upsert_txn( + txn, + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + values={ + "user_agent": user_agent, + "last_seen": last_seen, + "ip": ip, + }, + lock=False, + ) except Exception as e: # Failed to upsert, log and continue logger.error("Failed to insert client IP %r: %r", entry, e) diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql new file mode 100644 index 000000000..8818eeeb7 --- /dev/null +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -0,0 +1,21 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Track last seen information for a device in the devices table, rather +-- than relying on it being in the user_ips table (which we want to be able +-- to purge old entries from) +ALTER TABLE devices ADD COLUMN last_seen BIGINT; +ALTER TABLE devices ADD COLUMN ip TEXT; +ALTER TABLE devices ADD COLUMN user_agent TEXT; From ed80231ade20ce7881bb2026692fe3a6252f1c02 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 15:59:43 +0100 Subject: [PATCH 2/6] Add BG update to populate devices last seen info --- synapse/storage/client_ips.py | 52 +++++++++++++++++++ .../schema/delta/56/devices_last_seen.sql | 3 ++ 2 files changed, 55 insertions(+) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 4db2e7f48..883956226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -85,6 +85,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique ) + # Update the last seen info in devices. + self.register_background_update_handler( + "devices_last_seen", self._devices_last_seen_update + ) + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) self._batch_row_update = {} @@ -485,3 +490,50 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) + + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + sql = """ + SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE user_id > ? OR (user_id = ? AND device_id > ?) + ORDER BY user_id ASC, device_id ASC + LIMIT ? + """ + txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql index 8818eeeb7..dfa902d0b 100644 --- a/synapse/storage/schema/delta/56/devices_last_seen.sql +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -19,3 +19,6 @@ ALTER TABLE devices ADD COLUMN last_seen BIGINT; ALTER TABLE devices ADD COLUMN ip TEXT; ALTER TABLE devices ADD COLUMN user_agent TEXT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('devices_last_seen', '{}'); From 51d28272e20d799b2e35a8a14b3c1d9d5f555d10 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 16:00:18 +0100 Subject: [PATCH 3/6] Query devices table for last seen info. This is a) simpler than querying user_ips directly and b) means we can purge older entries from user_ips without losing the required info. The storage functions now no longer return the access_token, since it was unused. --- synapse/storage/client_ips.py | 57 +++++--------------------------- tests/storage/test_client_ips.py | 1 - 2 files changed, 8 insertions(+), 50 deletions(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 883956226..a4e6d9dbe 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -392,19 +392,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): keys giving the column names """ - res = yield self.runInteraction( - "get_last_client_ip_by_device", - self._get_last_client_ip_by_device_txn, - user_id, - device_id, - retcols=( - "user_id", - "access_token", - "ip", - "user_agent", - "device_id", - "last_seen", - ), + keyvalues = {"user_id": user_id} + if device_id: + keyvalues["device_id"] = device_id + + res = yield self._simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), ) ret = {(d["user_id"], d["device_id"]): d for d in res} @@ -423,42 +418,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } return ret - @classmethod - def _get_last_client_ip_by_device_txn(cls, txn, user_id, device_id, retcols): - where_clauses = [] - bindings = [] - if device_id is None: - where_clauses.append("user_id = ?") - bindings.extend((user_id,)) - else: - where_clauses.append("(user_id = ? AND device_id = ?)") - bindings.extend((user_id, device_id)) - - if not where_clauses: - return [] - - inner_select = ( - "SELECT MAX(last_seen) mls, user_id, device_id FROM user_ips " - "WHERE %(where)s " - "GROUP BY user_id, device_id" - ) % {"where": " OR ".join(where_clauses)} - - sql = ( - "SELECT %(retcols)s FROM user_ips " - "JOIN (%(inner_select)s) ips ON" - " user_ips.last_seen = ips.mls AND" - " user_ips.user_id = ips.user_id AND" - " (user_ips.device_id = ips.device_id OR" - " (user_ips.device_id IS NULL AND ips.device_id IS NULL)" - " )" - ) % { - "retcols": ",".join("user_ips." + c for c in retcols), - "inner_select": inner_select, - } - - txn.execute(sql, bindings) - return cls.cursor_to_dict(txn) - @defer.inlineCallbacks def get_user_ip_and_agents(self, user): user_id = user.to_string() diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 09305c3bf..6ac465408 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -55,7 +55,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): { "user_id": user_id, "device_id": "device_id", - "access_token": "access_token", "ip": "ip", "user_agent": "user_agent", "last_seen": 12345678000, From 691a70190b76aa29481f6299580b71160068ef8e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 16:04:41 +0100 Subject: [PATCH 4/6] Newsfile --- changelog.d/6089.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6089.misc diff --git a/changelog.d/6089.misc b/changelog.d/6089.misc new file mode 100644 index 000000000..fa3c197c5 --- /dev/null +++ b/changelog.d/6089.misc @@ -0,0 +1 @@ +Move last seen info into devices table. From acb62a7cc6973618397a868289b5881f1c3c1ec3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 16:50:31 +0100 Subject: [PATCH 5/6] Test background update --- tests/storage/test_client_ips.py | 79 ++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 6ac465408..76fe65b59 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -200,6 +200,85 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertTrue(active) + def test_devices_last_seen_bg_update(self): + # First make sure we have completed all updates. + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # Insert a user IP + user_id = "@user:id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) + ) + + # Force persisting to disk + self.reactor.advance(200) + + # But clear the associated entry in devices table + self.get_success( + self.store._simple_update( + table="devices", + keyvalues={"user_id": user_id, "device_id": "device_id"}, + updatevalues={"last_seen": None, "ip": None, "user_agent": None}, + desc="test_devices_last_seen_bg_update", + ) + ) + + # We should now get nulls when querying + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": None, + "user_agent": None, + "last_seen": None, + }, + r, + ) + + # Register the background update to run again. + self.get_success( + self.store._simple_insert( + table="background_updates", + values={ + "update_name": "devices_last_seen", + "progress_json": "{}", + "depends_on": None, + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store._all_done = False + + # Now let's actually drive the updates to completion + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # We should now get the correct result again + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": "ip", + "user_agent": "user_agent", + "last_seen": 0, + }, + r, + ) + class ClientIpAuthTestCase(unittest.HomeserverTestCase): From 50572db837f3e6a0869e9ec573e02d4af72548ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:00:23 +0100 Subject: [PATCH 6/6] Use if `is not None` Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/client_ips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe..899668974 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -393,7 +393,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """ keyvalues = {"user_id": user_id} - if device_id: + if device_id is not None: keyvalues["device_id"] = device_id res = yield self._simple_select_list(