From 6ef983ce5cc0a1cd7323ac82c8eed41d72ff3a99 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 31 Jul 2018 16:36:24 +0100 Subject: [PATCH 01/26] api into monthly_active_users table --- synapse/app/homeserver.py | 4 + synapse/storage/monthly_active_users.py | 89 +++++++++++++++++++ synapse/storage/prepare_database.py | 2 +- .../delta/50/make_event_content_nullable.py | 2 +- .../schema/delta/51/monthly_active_users.sql | 23 +++++ tests/storage/test_monthly_active_users.py | 42 +++++++++ 6 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/monthly_active_users.py create mode 100644 synapse/storage/schema/delta/51/monthly_active_users.sql create mode 100644 tests/storage/test_monthly_active_users.py diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 57b815d77..79772fa61 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -62,6 +62,7 @@ from synapse.rest.media.v0.content_repository import ContentRepoResource from synapse.server import HomeServer from synapse.storage import are_all_users_on_domain from synapse.storage.engines import IncorrectDatabaseSetup, create_engine +from synapse.storage.monthly_active_users import MonthlyActiveUsersStore from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_database from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.httpresourcetree import create_resource_tree @@ -511,6 +512,9 @@ def run(hs): # If you increase the loop period, the accuracy of user_daily_visits # table will decrease clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) + clock.looping_call( + MonthlyActiveUsersStore(hs).reap_monthly_active_users, 1000 * 60 * 60 + ) if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py new file mode 100644 index 000000000..373e828c0 --- /dev/null +++ b/synapse/storage/monthly_active_users.py @@ -0,0 +1,89 @@ +from twisted.internet import defer + +from ._base import SQLBaseStore + + +class MonthlyActiveUsersStore(SQLBaseStore): + def __init__(self, hs): + super(MonthlyActiveUsersStore, self).__init__(None, hs) + self._clock = hs.get_clock() + + def reap_monthly_active_users(self): + """ + Cleans out monthly active user table to ensure that no stale + entries exist. + Return: + defered, no return type + """ + def _reap_users(txn): + thirty_days_ago = ( + int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) + ) + sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" + txn.execute(sql, (thirty_days_ago,)) + + return self.runInteraction("reap_monthly_active_users", _reap_users) + + def get_monthly_active_count(self): + """ + Generates current count of monthly active users.abs + return: + defered resolves to int + """ + def _count_users(txn): + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT user_id FROM monthly_active_users + ) u + """ + txn.execute(sql) + count, = txn.fetchone() + return count + return self.runInteraction("count_users", _count_users) + + def upsert_monthly_active_user(self, user_id): + """ + Updates or inserts monthly active user member + Arguments: + user_id (str): user to add/update + """ + return self._simple_upsert( + desc="upsert_monthly_active_user", + table="monthly_active_users", + keyvalues={ + "user_id": user_id, + }, + values={ + "timestamp": int(self._clock.time_msec()), + }, + lock=False, + ) + + def clean_out_monthly_active_users(self): + pass + + @defer.inlineCallbacks + def is_user_monthly_active(self, user_id): + """ + Checks if a given user is part of the monthly active user group + Arguments: + user_id (str): user to add/update + Return: + bool : True if user part of group, False otherwise + """ + user_present = yield self._simple_select_onecol( + table="monthly_active_users", + keyvalues={ + "user_id": user_id, + }, + retcol="user_id", + desc="is_user_monthly_active", + ) + # jeff = self.cursor_to_dict(res) + result = False + if user_present: + result = True + + defer.returnValue( + result + ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b290f834b..b36471931 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 50 +SCHEMA_VERSION = 51 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py index 7d27342e3..6dd467b6c 100644 --- a/synapse/storage/schema/delta/50/make_event_content_nullable.py +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -88,5 +88,5 @@ def run_upgrade(cur, database_engine, *args, **kwargs): "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", (sql, ), ) - cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) + cur.execute("PRAGMA schema_version=%i" % (oldver + 1,)) cur.execute("PRAGMA writable_schema=OFF") diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql new file mode 100644 index 000000000..b3c0e1a67 --- /dev/null +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -0,0 +1,23 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- a table of users who have requested that their details be erased +CREATE TABLE monthly_active_users ( + user_id TEXT NOT NULL, + timestamp BIGINT NOT NULL +); + +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py new file mode 100644 index 000000000..9b1ffc636 --- /dev/null +++ b/tests/storage/test_monthly_active_users.py @@ -0,0 +1,42 @@ +from twisted.internet import defer + +from synapse.storage.monthly_active_users import MonthlyActiveUsersStore + +import tests.unittest +import tests.utils +from tests.utils import setup_test_homeserver + + +class MonthlyActiveUsersTestCase(tests.unittest.TestCase): + def __init__(self, *args, **kwargs): + super(MonthlyActiveUsersTestCase, self).__init__(*args, **kwargs) + self.mau = None + + @defer.inlineCallbacks + def setUp(self): + hs = yield setup_test_homeserver() + self.mau = MonthlyActiveUsersStore(hs) + + @defer.inlineCallbacks + def test_can_insert_and_count_mau(self): + count = yield self.mau.get_monthly_active_count() + self.assertEqual(0, count) + + yield self.mau.upsert_monthly_active_user("@user:server") + count = yield self.mau.get_monthly_active_count() + + self.assertEqual(1, count) + + @defer.inlineCallbacks + def test_is_user_monthly_active(self): + user_id1 = "@user1:server" + user_id2 = "@user2:server" + user_id3 = "@user3:server" + result = yield self.mau.is_user_monthly_active(user_id1) + self.assertFalse(result) + yield self.mau.upsert_monthly_active_user(user_id1) + yield self.mau.upsert_monthly_active_user(user_id2) + result = yield self.mau.is_user_monthly_active(user_id1) + self.assertTrue(result) + result = yield self.mau.is_user_monthly_active(user_id3) + self.assertFalse(result) From f9f55599718ba9849b2dee18e253dcaf8f5fcfe7 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 1 Aug 2018 12:03:42 +0100 Subject: [PATCH 02/26] fix comment --- synapse/storage/schema/delta/51/monthly_active_users.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql index b3c0e1a67..f2b6d3e31 100644 --- a/synapse/storage/schema/delta/51/monthly_active_users.sql +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -13,7 +13,7 @@ * limitations under the License. */ --- a table of users who have requested that their details be erased +-- a table of monthly active users, for use where blocking based on mau limits CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL From 4e5ac901ddd15e8938605c5ac4312be804997ed5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 1 Aug 2018 12:03:57 +0100 Subject: [PATCH 03/26] clean up --- synapse/storage/monthly_active_users.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 373e828c0..03eeea792 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -31,11 +31,8 @@ class MonthlyActiveUsersStore(SQLBaseStore): defered resolves to int """ def _count_users(txn): - sql = """ - SELECT COALESCE(count(*), 0) FROM ( - SELECT user_id FROM monthly_active_users - ) u - """ + sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users" + txn.execute(sql) count, = txn.fetchone() return count @@ -59,9 +56,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): lock=False, ) - def clean_out_monthly_active_users(self): - pass - @defer.inlineCallbacks def is_user_monthly_active(self, user_id): """ @@ -79,11 +73,5 @@ class MonthlyActiveUsersStore(SQLBaseStore): retcol="user_id", desc="is_user_monthly_active", ) - # jeff = self.cursor_to_dict(res) - result = False - if user_present: - result = True - defer.returnValue( - result - ) + defer.returnValue(bool(user_present)) From ec716a35b219d147dee51733b55573952799a549 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 1 Aug 2018 17:54:37 +0100 Subject: [PATCH 04/26] change monthly_active_users table to be a single column --- synapse/storage/monthly_active_users.py | 10 +++------- .../storage/schema/delta/51/monthly_active_users.sql | 4 +--- tests/storage/test_monthly_active_users.py | 6 +++--- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 03eeea792..7b3f13aed 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -38,22 +38,18 @@ class MonthlyActiveUsersStore(SQLBaseStore): return count return self.runInteraction("count_users", _count_users) - def upsert_monthly_active_user(self, user_id): + def insert_monthly_active_user(self, user_id): """ Updates or inserts monthly active user member Arguments: user_id (str): user to add/update """ - return self._simple_upsert( + return self._simple_insert( desc="upsert_monthly_active_user", table="monthly_active_users", - keyvalues={ + values={ "user_id": user_id, }, - values={ - "timestamp": int(self._clock.time_msec()), - }, - lock=False, ) @defer.inlineCallbacks diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql index f2b6d3e31..590b1abab 100644 --- a/synapse/storage/schema/delta/51/monthly_active_users.sql +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -15,9 +15,7 @@ -- a table of monthly active users, for use where blocking based on mau limits CREATE TABLE monthly_active_users ( - user_id TEXT NOT NULL, - timestamp BIGINT NOT NULL + user_id TEXT NOT NULL ); CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); -CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 9b1ffc636..7a8432ce6 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -22,7 +22,7 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): count = yield self.mau.get_monthly_active_count() self.assertEqual(0, count) - yield self.mau.upsert_monthly_active_user("@user:server") + yield self.mau.insert_monthly_active_user("@user:server") count = yield self.mau.get_monthly_active_count() self.assertEqual(1, count) @@ -34,8 +34,8 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): user_id3 = "@user3:server" result = yield self.mau.is_user_monthly_active(user_id1) self.assertFalse(result) - yield self.mau.upsert_monthly_active_user(user_id1) - yield self.mau.upsert_monthly_active_user(user_id2) + yield self.mau.insert_monthly_active_user(user_id1) + yield self.mau.insert_monthly_active_user(user_id2) result = yield self.mau.is_user_monthly_active(user_id1) self.assertTrue(result) result = yield self.mau.is_user_monthly_active(user_id3) From c21d82bab3b32e2f59c9ef09a1a10b337ce45db4 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 1 Aug 2018 23:24:38 +0100 Subject: [PATCH 05/26] normalise reaping query --- synapse/storage/monthly_active_users.py | 41 +++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 7b3f13aed..0741c7fa6 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -7,6 +7,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): def __init__(self, hs): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() + self.max_mau_value = hs.config.max_mau_value def reap_monthly_active_users(self): """ @@ -19,8 +20,42 @@ class MonthlyActiveUsersStore(SQLBaseStore): thirty_days_ago = ( int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) - sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - txn.execute(sql, (thirty_days_ago,)) + + # Query deletes the union of users that have either: + # * not visited in the last 30 days + # * exceeded the total max_mau_value threshold. Where there is + # an excess, more recent users are favoured - this is to cover + # the case where the limit has been step change reduced. + # + sql = """ + DELETE FROM monthly_active_users + WHERE user_id + IN ( + SELECT * FROM ( + SELECT monthly_active_users.user_id + FROM monthly_active_users + LEFT JOIN ( + SELECT user_id, max(last_seen) AS last_seen + FROM user_ips + GROUP BY user_id + ) AS uip ON uip.user_id=monthly_active_users.user_id + ORDER BY uip.last_seen desc LIMIT -1 OFFSET ? + ) + UNION + SELECT * FROM ( + SELECT monthly_active_users.user_id + FROM monthly_active_users + LEFT JOIN ( + SELECT user_id, max(last_seen) AS last_seen + FROM user_ips + GROUP BY user_id + ) AS uip ON uip.user_id=monthly_active_users.user_id + WHERE uip.last_seen < ? + ) + ) + """ + + txn.execute(sql, (self.max_mau_value, thirty_days_ago,)) return self.runInteraction("reap_monthly_active_users", _reap_users) @@ -45,7 +80,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): user_id (str): user to add/update """ return self._simple_insert( - desc="upsert_monthly_active_user", + desc="insert_monthly_active_user", table="monthly_active_users", values={ "user_id": user_id, From 08281fe6b7cdd9620d28d2aa6b725bdb0a351bbc Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 1 Aug 2018 23:26:24 +0100 Subject: [PATCH 06/26] self.db_conn unused --- synapse/storage/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 134e4a80f..04ff1f800 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -94,7 +94,6 @@ class DataStore(RoomMemberStore, RoomStore, self._clock = hs.get_clock() self.database_engine = hs.database_engine - self.db_conn = db_conn self._stream_id_gen = StreamIdGenerator( db_conn, "events", "stream_ordering", extra_tables=[("local_invites", "stream_id")] From 165e06703340667dd88eb73dfe299a4d3298b94b Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 10:59:58 +0100 Subject: [PATCH 07/26] Revert "change monthly_active_users table to be a single column" This reverts commit ec716a35b219d147dee51733b55573952799a549. --- synapse/storage/monthly_active_users.py | 10 +++++++--- .../storage/schema/delta/51/monthly_active_users.sql | 4 +++- tests/storage/test_monthly_active_users.py | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 7b3f13aed..03eeea792 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -38,18 +38,22 @@ class MonthlyActiveUsersStore(SQLBaseStore): return count return self.runInteraction("count_users", _count_users) - def insert_monthly_active_user(self, user_id): + def upsert_monthly_active_user(self, user_id): """ Updates or inserts monthly active user member Arguments: user_id (str): user to add/update """ - return self._simple_insert( + return self._simple_upsert( desc="upsert_monthly_active_user", table="monthly_active_users", - values={ + keyvalues={ "user_id": user_id, }, + values={ + "timestamp": int(self._clock.time_msec()), + }, + lock=False, ) @defer.inlineCallbacks diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql index 590b1abab..f2b6d3e31 100644 --- a/synapse/storage/schema/delta/51/monthly_active_users.sql +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -15,7 +15,9 @@ -- a table of monthly active users, for use where blocking based on mau limits CREATE TABLE monthly_active_users ( - user_id TEXT NOT NULL + user_id TEXT NOT NULL, + timestamp BIGINT NOT NULL ); CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 7a8432ce6..9b1ffc636 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -22,7 +22,7 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): count = yield self.mau.get_monthly_active_count() self.assertEqual(0, count) - yield self.mau.insert_monthly_active_user("@user:server") + yield self.mau.upsert_monthly_active_user("@user:server") count = yield self.mau.get_monthly_active_count() self.assertEqual(1, count) @@ -34,8 +34,8 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): user_id3 = "@user3:server" result = yield self.mau.is_user_monthly_active(user_id1) self.assertFalse(result) - yield self.mau.insert_monthly_active_user(user_id1) - yield self.mau.insert_monthly_active_user(user_id2) + yield self.mau.upsert_monthly_active_user(user_id1) + yield self.mau.upsert_monthly_active_user(user_id2) result = yield self.mau.is_user_monthly_active(user_id1) self.assertTrue(result) result = yield self.mau.is_user_monthly_active(user_id3) From 00f99f74b1b875bb7ac6b0623994cabad4a59cc6 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 13:47:19 +0100 Subject: [PATCH 08/26] insertion into monthly_active_users --- synapse/api/auth.py | 2 +- synapse/storage/__init__.py | 2 + synapse/storage/client_ips.py | 22 ++++++++- synapse/storage/monthly_active_users.py | 18 ++++--- tests/storage/test_client_ips.py | 66 +++++++++++++++++++++++-- 5 files changed, 99 insertions(+), 11 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 5bbbe8e2e..d8022bcf8 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -213,7 +213,7 @@ class Auth(object): default=[b""] )[0] if user and access_token and ip_addr: - self.store.insert_client_ip( + yield self.store.insert_client_ip( user_id=user.to_string(), access_token=access_token, ip=ip_addr, diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 04ff1f800..2120f46ed 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -39,6 +39,7 @@ from .filtering import FilteringStore from .group_server import GroupServerStore from .keys import KeyStore from .media_repository import MediaRepositoryStore +from .monthly_active_users import MonthlyActiveUsersStore from .openid import OpenIdStore from .presence import PresenceStore, UserPresenceState from .profile import ProfileStore @@ -87,6 +88,7 @@ class DataStore(RoomMemberStore, RoomStore, UserDirectoryStore, GroupServerStore, UserErasureStore, + MonthlyActiveUsersStore, ): def __init__(self, db_conn, hs): diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index b8cefd43d..506915a1e 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -35,6 +35,7 @@ LAST_SEEN_GRANULARITY = 120 * 1000 class ClientIpStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): + self.client_ip_last_seen = Cache( name="client_ip_last_seen", keylen=4, @@ -74,6 +75,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + @defer.inlineCallbacks def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id, now=None): if not now: @@ -84,7 +86,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_seen = self.client_ip_last_seen.get(key) except KeyError: last_seen = None - + yield self._populate_monthly_active_users(user_id) # Rate-limited inserts if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return @@ -93,6 +95,24 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @defer.inlineCallbacks + def _populate_monthly_active_users(self, user_id): + store = self.hs.get_datastore() + print "entering _populate_monthly_active_users" + if self.hs.config.limit_usage_by_mau: + print "self.hs.config.limit_usage_by_mau is TRUE" + is_user_monthly_active = yield store.is_user_monthly_active(user_id) + print "is_user_monthly_active is %r" % is_user_monthly_active + if is_user_monthly_active: + yield store.upsert_monthly_active_user(user_id) + else: + count = yield store.get_monthly_active_count() + print "count is %d" % count + if count < self.hs.config.max_mau_value: + print "count is less than self.hs.config.max_mau_value " + res = yield store.upsert_monthly_active_user(user_id) + print "upsert response is %r" % res + def _update_client_ips_batch(self): def update(): to_update = self._batch_row_update diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 2337438c5..aada9bd2b 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -4,7 +4,7 @@ from ._base import SQLBaseStore class MonthlyActiveUsersStore(SQLBaseStore): - def __init__(self, hs): + def __init__(self, dbconn, hs): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() self.max_mau_value = hs.config.max_mau_value @@ -14,24 +14,28 @@ class MonthlyActiveUsersStore(SQLBaseStore): Cleans out monthly active user table to ensure that no stale entries exist. Return: - defered, no return type + Defered() """ def _reap_users(txn): thirty_days_ago = ( int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) - sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - txn.execute(sql, (thirty_days_ago,)) + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + """ + txn.execute(sql, (self.max_mau_value,)) return self.runInteraction("reap_monthly_active_users", _reap_users) def get_monthly_active_count(self): """ Generates current count of monthly active users.abs - return: - defered resolves to int + Return: + Defered(int): Number of current monthly active users """ def _count_users(txn): sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users" @@ -46,6 +50,8 @@ class MonthlyActiveUsersStore(SQLBaseStore): Updates or inserts monthly active user member Arguments: user_id (str): user to add/update + Deferred(bool): True if a new entry was created, False if an + existing one was updated. """ return self._simple_upsert( desc="upsert_monthly_active_user", diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index bd6fda6cb..e1552510c 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from mock import Mock from twisted.internet import defer @@ -27,9 +28,9 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def setUp(self): - hs = yield tests.utils.setup_test_homeserver() - self.store = hs.get_datastore() - self.clock = hs.get_clock() + self.hs = yield tests.utils.setup_test_homeserver() + self.store = self.hs.get_datastore() + self.clock = self.hs.get_clock() @defer.inlineCallbacks def test_insert_new_client_ip(self): @@ -54,3 +55,62 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): }, r ) + + @defer.inlineCallbacks + def test_disabled_monthly_active_user(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.max_mau_value = 50 + user_id = "@user:server" + yield self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id", + ) + active = yield self.store.is_user_monthly_active(user_id) + self.assertFalse(active) + + @defer.inlineCallbacks + def test_adding_monthly_active_user_when_full(self): + self.hs.config.limit_usage_by_mau = True + self.hs.config.max_mau_value = 50 + lots_of_users = 100 + user_id = "@user:server" + + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(lots_of_users) + ) + yield self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id", + ) + active = yield self.store.is_user_monthly_active(user_id) + self.assertFalse(active) + + @defer.inlineCallbacks + def test_adding_monthly_active_user_when_space(self): + self.hs.config.limit_usage_by_mau = True + self.hs.config.max_mau_value = 50 + user_id = "@user:server" + active = yield self.store.is_user_monthly_active(user_id) + self.assertFalse(active) + + yield self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id", + ) + active = yield self.store.is_user_monthly_active(user_id) + self.assertTrue(active) + + @defer.inlineCallbacks + def test_updating_monthly_active_user_when_space(self): + self.hs.config.limit_usage_by_mau = True + self.hs.config.max_mau_value = 50 + user_id = "@user:server" + + active = yield self.store.is_user_monthly_active(user_id) + self.assertFalse(active) + + yield self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id", + ) + yield self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id", + ) + active = yield self.store.is_user_monthly_active(user_id) + self.assertTrue(active) From c4ffbecb6856602743198f1a8d4c25a8bb7afd4d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 13:51:05 +0100 Subject: [PATCH 09/26] fix test, update constructor call --- tests/storage/test_monthly_active_users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 9b1ffc636..d8d25a606 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -15,7 +15,7 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def setUp(self): hs = yield setup_test_homeserver() - self.mau = MonthlyActiveUsersStore(hs) + self.mau = MonthlyActiveUsersStore(None, hs) @defer.inlineCallbacks def test_can_insert_and_count_mau(self): From 9180061b49907da363f2e3bfa0061f913d7ccf7a Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 15:55:29 +0100 Subject: [PATCH 10/26] remove unused count_monthly_users --- synapse/storage/__init__.py | 25 -------------- tests/storage/test__init__.py | 65 ----------------------------------- 2 files changed, 90 deletions(-) delete mode 100644 tests/storage/test__init__.py diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 2120f46ed..23b4a8d76 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -268,31 +268,6 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_users", _count_users) - def count_monthly_users(self): - """Counts the number of users who used this homeserver in the last 30 days - - This method should be refactored with count_daily_users - the only - reason not to is waiting on definition of mau - - Returns: - Defered[int] - """ - def _count_monthly_users(txn): - thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) - sql = """ - SELECT COALESCE(count(*), 0) FROM ( - SELECT user_id FROM user_ips - WHERE last_seen > ? - GROUP BY user_id - ) u - """ - - txn.execute(sql, (thirty_days_ago,)) - count, = txn.fetchone() - return count - - return self.runInteraction("count_monthly_users", _count_monthly_users) - def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- diff --git a/tests/storage/test__init__.py b/tests/storage/test__init__.py deleted file mode 100644 index f19cb1265..000000000 --- a/tests/storage/test__init__.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from twisted.internet import defer - -import tests.utils - - -class InitTestCase(tests.unittest.TestCase): - def __init__(self, *args, **kwargs): - super(InitTestCase, self).__init__(*args, **kwargs) - self.store = None # type: synapse.storage.DataStore - - @defer.inlineCallbacks - def setUp(self): - hs = yield tests.utils.setup_test_homeserver() - - hs.config.max_mau_value = 50 - hs.config.limit_usage_by_mau = True - self.store = hs.get_datastore() - self.clock = hs.get_clock() - - @defer.inlineCallbacks - def test_count_monthly_users(self): - count = yield self.store.count_monthly_users() - self.assertEqual(0, count) - - yield self._insert_user_ips("@user:server1") - yield self._insert_user_ips("@user:server2") - - count = yield self.store.count_monthly_users() - self.assertEqual(2, count) - - @defer.inlineCallbacks - def _insert_user_ips(self, user): - """ - Helper function to populate user_ips without using batch insertion infra - args: - user (str): specify username i.e. @user:server.com - """ - yield self.store._simple_upsert( - table="user_ips", - keyvalues={ - "user_id": user, - "access_token": "access_token", - "ip": "ip", - "user_agent": "user_agent", - "device_id": "device_id", - }, - values={ - "last_seen": self.clock.time_msec(), - } - ) From 74b1d46ad9ae692774f2e9d71cbbe1cea91b4070 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 16:57:35 +0100 Subject: [PATCH 11/26] do mau checks based on monthly_active_users table --- synapse/api/auth.py | 13 ++++++ synapse/handlers/auth.py | 10 ++--- synapse/handlers/register.py | 10 ++--- synapse/storage/client_ips.py | 15 +++---- tests/api/test_auth.py | 31 +++++++++++++- tests/handlers/test_auth.py | 8 ++-- tests/handlers/test_register.py | 71 ++++++++++++++++----------------- 7 files changed, 97 insertions(+), 61 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index d8022bcf8..943a48833 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -773,3 +773,16 @@ class Auth(object): raise AuthError( 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN ) + + @defer.inlineCallbacks + def check_auth_blocking(self, error): + """Checks if the user should be rejected for some external reason, + such as monthly active user limiting or global disable flag + Args: + error (Error): The error that should be raised if user is to be + blocked + """ + if self.hs.config.limit_usage_by_mau is True: + current_mau = yield self.store.get_monthly_active_count() + if current_mau >= self.hs.config.max_mau_value: + raise error diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 184eef09d..8f9cff92e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -913,12 +913,10 @@ class AuthHandler(BaseHandler): Ensure that if mau blocking is enabled that invalid users cannot log in. """ - if self.hs.config.limit_usage_by_mau is True: - current_mau = yield self.store.count_monthly_users() - if current_mau >= self.hs.config.max_mau_value: - raise AuthError( - 403, "MAU Limit Exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED - ) + error = AuthError( + 403, "Monthly Active User limits exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED + ) + yield self.auth.check_auth_blocking(error) @attr.s diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 289704b24..706ed8c29 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -540,9 +540,7 @@ class RegistrationHandler(BaseHandler): Do not accept registrations if monthly active user limits exceeded and limiting is enabled """ - if self.hs.config.limit_usage_by_mau is True: - current_mau = yield self.store.count_monthly_users() - if current_mau >= self.hs.config.max_mau_value: - raise RegistrationError( - 403, "MAU Limit Exceeded", Codes.MAU_LIMIT_EXCEEDED - ) + error = RegistrationError( + 403, "Monthly Active User limits exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED + ) + yield self.auth.check_auth_blocking(error) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 506915a1e..83d64d156 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -97,21 +97,22 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): @defer.inlineCallbacks def _populate_monthly_active_users(self, user_id): + """Checks on the state of monthly active user limits and optionally + add the user to the monthly active tables + + Args: + user_id(str): the user_id to query + """ + store = self.hs.get_datastore() - print "entering _populate_monthly_active_users" if self.hs.config.limit_usage_by_mau: - print "self.hs.config.limit_usage_by_mau is TRUE" is_user_monthly_active = yield store.is_user_monthly_active(user_id) - print "is_user_monthly_active is %r" % is_user_monthly_active if is_user_monthly_active: yield store.upsert_monthly_active_user(user_id) else: count = yield store.get_monthly_active_count() - print "count is %d" % count if count < self.hs.config.max_mau_value: - print "count is less than self.hs.config.max_mau_value " - res = yield store.upsert_monthly_active_user(user_id) - print "upsert response is %r" % res + yield store.upsert_monthly_active_user(user_id) def _update_client_ips_batch(self): def update(): diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index a82d737e7..54bdf2866 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,7 +21,7 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, Codes from synapse.types import UserID from tests import unittest @@ -444,3 +444,32 @@ class AuthTestCase(unittest.TestCase): self.assertEqual("Guest access token used for regular user", cm.exception.msg) self.store.get_user_by_id.assert_called_with(USER_ID) + + @defer.inlineCallbacks + def test_blocking_mau(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.max_mau_value = 50 + lots_of_users = 100 + small_number_of_users = 1 + + error = AuthError( + 403, "MAU Limit Exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED + ) + + # Ensure no error thrown + yield self.auth.check_auth_blocking(error) + + self.hs.config.limit_usage_by_mau = True + + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(lots_of_users) + ) + + with self.assertRaises(AuthError): + yield self.auth.check_auth_blocking(error) + + # Ensure does not throw an error + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(small_number_of_users) + ) + yield self.auth.check_auth_blocking(error) diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 55eab9e9c..8a9bf2d5f 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -132,14 +132,14 @@ class AuthTestCase(unittest.TestCase): @defer.inlineCallbacks def test_mau_limits_exceeded(self): self.hs.config.limit_usage_by_mau = True - self.hs.get_datastore().count_monthly_users = Mock( + self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.large_number_of_users) ) with self.assertRaises(AuthError): yield self.auth_handler.get_access_token_for_user_id('user_a') - self.hs.get_datastore().count_monthly_users = Mock( + self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.large_number_of_users) ) with self.assertRaises(AuthError): @@ -151,13 +151,13 @@ class AuthTestCase(unittest.TestCase): def test_mau_limits_not_exceeded(self): self.hs.config.limit_usage_by_mau = True - self.hs.get_datastore().count_monthly_users = Mock( + self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.small_number_of_users) ) # Ensure does not raise exception yield self.auth_handler.get_access_token_for_user_id('user_a') - self.hs.get_datastore().count_monthly_users = Mock( + self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.small_number_of_users) ) yield self.auth_handler.validate_short_term_login_token_and_get_user_id( diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 0937d71cf..6b5b8b377 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -50,6 +50,10 @@ class RegistrationTestCase(unittest.TestCase): self.hs.get_macaroon_generator = Mock(return_value=self.macaroon_generator) self.hs.handlers = RegistrationHandlers(self.hs) self.handler = self.hs.get_handlers().registration_handler + self.store = self.hs.get_datastore() + self.hs.config.max_mau_value = 50 + self.lots_of_users = 100 + self.small_number_of_users = 1 @defer.inlineCallbacks def test_user_is_created_and_logged_in_if_doesnt_exist(self): @@ -80,51 +84,44 @@ class RegistrationTestCase(unittest.TestCase): self.assertEquals(result_token, 'secret') @defer.inlineCallbacks - def test_cannot_register_when_mau_limits_exceeded(self): - local_part = "someone" - display_name = "someone" - requester = create_requester("@as:test") - store = self.hs.get_datastore() + def test_mau_limits_when_disabled(self): self.hs.config.limit_usage_by_mau = False - self.hs.config.max_mau_value = 50 - lots_of_users = 100 - small_number_users = 1 - - store.count_monthly_users = Mock(return_value=defer.succeed(lots_of_users)) - # Ensure does not throw exception - yield self.handler.get_or_create_user(requester, 'a', display_name) + yield self.handler.get_or_create_user("requester", 'a', "display_name") + @defer.inlineCallbacks + def test_get_or_create_user_mau_not_blocked(self): self.hs.config.limit_usage_by_mau = True - - with self.assertRaises(RegistrationError): - yield self.handler.get_or_create_user(requester, 'b', display_name) - - store.count_monthly_users = Mock(return_value=defer.succeed(small_number_users)) - - self._macaroon_mock_generator("another_secret") - + self.store.count_monthly_users = Mock( + return_value=defer.succeed(self.small_number_of_users) + ) # Ensure does not throw exception - yield self.handler.get_or_create_user("@neil:matrix.org", 'c', "Neil") + yield self.handler.get_or_create_user("@user:server", 'c', "User") - self._macaroon_mock_generator("another another secret") - store.count_monthly_users = Mock(return_value=defer.succeed(lots_of_users)) + @defer.inlineCallbacks + def test_get_or_create_user_mau_blocked(self): + self.hs.config.limit_usage_by_mau = True + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(self.lots_of_users) + ) with self.assertRaises(RegistrationError): - yield self.handler.register(localpart=local_part) - - self._macaroon_mock_generator("another another secret") - store.count_monthly_users = Mock(return_value=defer.succeed(lots_of_users)) + yield self.handler.get_or_create_user("requester", 'b', "display_name") + @defer.inlineCallbacks + def test_register_mau_blocked(self): + self.hs.config.limit_usage_by_mau = True + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(self.lots_of_users) + ) with self.assertRaises(RegistrationError): - yield self.handler.register_saml2(local_part) + yield self.handler.register(localpart="local_part") - def _macaroon_mock_generator(self, secret): - """ - Reset macaroon generator in the case where the test creates multiple users - """ - macaroon_generator = Mock( - generate_access_token=Mock(return_value=secret)) - self.hs.get_macaroon_generator = Mock(return_value=macaroon_generator) - self.hs.handlers = RegistrationHandlers(self.hs) - self.handler = self.hs.get_handlers().registration_handler + @defer.inlineCallbacks + def test_register_saml2_mau_blocked(self): + self.hs.config.limit_usage_by_mau = True + self.store.get_monthly_active_count = Mock( + return_value=defer.succeed(self.lots_of_users) + ) + with self.assertRaises(RegistrationError): + yield self.handler.register_saml2(localpart="local_part") From 4ecb4bdac963ae967da810de6134d85b50c999f9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 22:41:05 +0100 Subject: [PATCH 12/26] wip attempt at caching --- synapse/storage/monthly_active_users.py | 56 +++++++++++++++++++++---- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index aada9bd2b..19846cefd 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -1,4 +1,6 @@ from twisted.internet import defer +from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from ._base import SQLBaseStore @@ -20,17 +22,53 @@ class MonthlyActiveUsersStore(SQLBaseStore): thirty_days_ago = ( int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) - sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - txn.execute(sql, (thirty_days_ago,)) - sql = """ - DELETE FROM monthly_active_users - ORDER BY timestamp desc - LIMIT -1 OFFSET ? - """ - txn.execute(sql, (self.max_mau_value,)) + + if isinstance(self.database_engine, PostgresEngine): + sql = """ + DELETE FROM monthly_active_users + WHERE timestamp < ? + RETURNING user_id + """ + txn.execute(sql, (thirty_days_ago,)) + res = txn.fetchall() + for r in res: + self.is_user_monthly_active.invalidate(r) + + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + RETURNING user_id + """ + txn.execute(sql, (self.max_mau_value,)) + res = txn.fetchall() + for r in res: + self.is_user_monthly_active.invalidate(r) + print r + self.get_monthly_active_count.invalidate() + elif isinstance(self.database_engine, Sqlite3Engine): + sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" + + txn.execute(sql, (thirty_days_ago,)) + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + """ + txn.execute(sql, (self.max_mau_value,)) + + # It seems poor to invalidate the whole cache, but the alternative + # is to select then delete which has it's own problem. + # It seems unlikely that anyone using this feature on large datasets + # would be using sqlite and if they are then there will be + # larger perf issues than this one to encourage an upgrade to postgres. + + self.is_user_monthly_active.invalidate_all() + self.get_monthly_active_count.invalidate_all() return self.runInteraction("reap_monthly_active_users", _reap_users) + @cachedInlineCallbacks(num_args=0) def get_monthly_active_count(self): """ Generates current count of monthly active users.abs @@ -65,7 +103,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): lock=False, ) - @defer.inlineCallbacks + @cachedInlineCallbacks(num_args=1) def is_user_monthly_active(self, user_id): """ Checks if a given user is part of the monthly active user group From 5e2f7b80844bc8ee401beab494948c093338f404 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 22:47:48 +0100 Subject: [PATCH 13/26] typo --- synapse/storage/monthly_active_users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 19846cefd..2872ba4ca 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -58,7 +58,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn.execute(sql, (self.max_mau_value,)) # It seems poor to invalidate the whole cache, but the alternative - # is to select then delete which has it's own problem. + # is to select then delete which has its own problems. # It seems unlikely that anyone using this feature on large datasets # would be using sqlite and if they are then there will be # larger perf issues than this one to encourage an upgrade to postgres. From c0affa7b4ff1e0775a8a5e6d704596b5383112a5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 2 Aug 2018 23:03:01 +0100 Subject: [PATCH 14/26] update generate_monthly_active_users, and reap_monthly_active_users --- synapse/app/homeserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 5f0ca51ac..7d4ea493b 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -520,14 +520,14 @@ def run(hs): # table will decrease clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) clock.looping_call( - MonthlyActiveUsersStore(hs).reap_monthly_active_users, 1000 * 60 * 60 + hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60 ) @defer.inlineCallbacks def generate_monthly_active_users(): count = 0 if hs.config.limit_usage_by_mau: - count = yield hs.get_datastore().count_monthly_users() + count = yield hs.get_datastore().get_monthly_active_count() current_mau_gauge.set(float(count)) max_mau_value_gauge.set(float(hs.config.max_mau_value)) From 950807d93a264b6d10ece386d227dc4069f7d0da Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 13:49:53 +0100 Subject: [PATCH 15/26] fix caching and tests --- synapse/app/homeserver.py | 1 - synapse/storage/monthly_active_users.py | 91 ++++++++++------------ tests/storage/test_monthly_active_users.py | 50 ++++++++---- 3 files changed, 80 insertions(+), 62 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7d4ea493b..3a67db8b3 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -64,7 +64,6 @@ from synapse.rest.media.v0.content_repository import ContentRepoResource from synapse.server import HomeServer from synapse.storage import are_all_users_on_domain from synapse.storage.engines import IncorrectDatabaseSetup, create_engine -from synapse.storage.monthly_active_users import MonthlyActiveUsersStore from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_database from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.httpresourcetree import create_resource_tree diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 2872ba4ca..d06c90cbc 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -1,6 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from twisted.internet import defer -from synapse.util.caches.descriptors import cachedInlineCallbacks -from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from ._base import SQLBaseStore @@ -9,7 +24,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): def __init__(self, dbconn, hs): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() - self.max_mau_value = hs.config.max_mau_value + self.hs = hs def reap_monthly_active_users(self): """ @@ -19,62 +34,41 @@ class MonthlyActiveUsersStore(SQLBaseStore): Defered() """ def _reap_users(txn): + thirty_days_ago = ( int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) - if isinstance(self.database_engine, PostgresEngine): - sql = """ - DELETE FROM monthly_active_users - WHERE timestamp < ? - RETURNING user_id - """ - txn.execute(sql, (thirty_days_ago,)) - res = txn.fetchall() - for r in res: - self.is_user_monthly_active.invalidate(r) + sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - sql = """ - DELETE FROM monthly_active_users - ORDER BY timestamp desc - LIMIT -1 OFFSET ? - RETURNING user_id - """ - txn.execute(sql, (self.max_mau_value,)) - res = txn.fetchall() - for r in res: - self.is_user_monthly_active.invalidate(r) - print r - self.get_monthly_active_count.invalidate() - elif isinstance(self.database_engine, Sqlite3Engine): - sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" + txn.execute(sql, (thirty_days_ago,)) + sql = """ + DELETE FROM monthly_active_users + ORDER BY timestamp desc + LIMIT -1 OFFSET ? + """ + txn.execute(sql, (self.hs.config.max_mau_value,)) - txn.execute(sql, (thirty_days_ago,)) - sql = """ - DELETE FROM monthly_active_users - ORDER BY timestamp desc - LIMIT -1 OFFSET ? - """ - txn.execute(sql, (self.max_mau_value,)) + res = self.runInteraction("reap_monthly_active_users", _reap_users) + # It seems poor to invalidate the whole cache, Postgres supports + # 'Returning' which would allow me to invalidate only the + # specific users, but sqlite has no way to do this and instead + # I would need to SELECT and the DELETE which without locking + # is racy. + # Have resolved to invalidate the whole cache for now and do + # something about it if and when the perf becomes significant + self.is_user_monthly_active.invalidate_all() + self.get_monthly_active_count.invalidate_all() + return res - # It seems poor to invalidate the whole cache, but the alternative - # is to select then delete which has its own problems. - # It seems unlikely that anyone using this feature on large datasets - # would be using sqlite and if they are then there will be - # larger perf issues than this one to encourage an upgrade to postgres. - - self.is_user_monthly_active.invalidate_all() - self.get_monthly_active_count.invalidate_all() - - return self.runInteraction("reap_monthly_active_users", _reap_users) - - @cachedInlineCallbacks(num_args=0) + @cached(num_args=0) def get_monthly_active_count(self): """ Generates current count of monthly active users.abs Return: Defered(int): Number of current monthly active users """ + def _count_users(txn): sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users" @@ -91,7 +85,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): Deferred(bool): True if a new entry was created, False if an existing one was updated. """ - return self._simple_upsert( + self._simple_upsert( desc="upsert_monthly_active_user", table="monthly_active_users", keyvalues={ @@ -102,6 +96,8 @@ class MonthlyActiveUsersStore(SQLBaseStore): }, lock=False, ) + self.is_user_monthly_active.invalidate((user_id,)) + self.get_monthly_active_count.invalidate(()) @cachedInlineCallbacks(num_args=1) def is_user_monthly_active(self, user_id): @@ -120,5 +116,4 @@ class MonthlyActiveUsersStore(SQLBaseStore): retcol="user_id", desc="is_user_monthly_active", ) - defer.returnValue(bool(user_present)) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index d8d25a606..c32109ecc 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -1,6 +1,19 @@ -from twisted.internet import defer +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -from synapse.storage.monthly_active_users import MonthlyActiveUsersStore +from twisted.internet import defer import tests.unittest import tests.utils @@ -10,20 +23,19 @@ from tests.utils import setup_test_homeserver class MonthlyActiveUsersTestCase(tests.unittest.TestCase): def __init__(self, *args, **kwargs): super(MonthlyActiveUsersTestCase, self).__init__(*args, **kwargs) - self.mau = None @defer.inlineCallbacks def setUp(self): - hs = yield setup_test_homeserver() - self.mau = MonthlyActiveUsersStore(None, hs) + self.hs = yield setup_test_homeserver() + self.store = self.hs.get_datastore() @defer.inlineCallbacks def test_can_insert_and_count_mau(self): - count = yield self.mau.get_monthly_active_count() + count = yield self.store.get_monthly_active_count() self.assertEqual(0, count) - yield self.mau.upsert_monthly_active_user("@user:server") - count = yield self.mau.get_monthly_active_count() + yield self.store.upsert_monthly_active_user("@user:server") + count = yield self.store.get_monthly_active_count() self.assertEqual(1, count) @@ -32,11 +44,23 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): user_id1 = "@user1:server" user_id2 = "@user2:server" user_id3 = "@user3:server" - result = yield self.mau.is_user_monthly_active(user_id1) + result = yield self.store.is_user_monthly_active(user_id1) self.assertFalse(result) - yield self.mau.upsert_monthly_active_user(user_id1) - yield self.mau.upsert_monthly_active_user(user_id2) - result = yield self.mau.is_user_monthly_active(user_id1) + yield self.store.upsert_monthly_active_user(user_id1) + yield self.store.upsert_monthly_active_user(user_id2) + result = yield self.store.is_user_monthly_active(user_id1) self.assertTrue(result) - result = yield self.mau.is_user_monthly_active(user_id3) + result = yield self.store.is_user_monthly_active(user_id3) self.assertFalse(result) + + @defer.inlineCallbacks + def test_reap_monthly_active_users(self): + self.hs.config.max_mau_value = 5 + initial_users = 10 + for i in range(initial_users): + yield self.store.upsert_monthly_active_user("@user%d:server" % i) + count = yield self.store.get_monthly_active_count() + self.assertTrue(count, initial_users) + yield self.store.reap_monthly_active_users() + count = yield self.store.get_monthly_active_count() + self.assertTrue(count, initial_users - self.hs.config.max_mau_value) From da90337d8952999f6c2c7dbcbb006b46bce17020 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 14:05:20 +0100 Subject: [PATCH 16/26] Add ability to limit number of monthly active users on the server --- changelog.d/3633.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3633.feature diff --git a/changelog.d/3633.feature b/changelog.d/3633.feature new file mode 100644 index 000000000..8007a0484 --- /dev/null +++ b/changelog.d/3633.feature @@ -0,0 +1 @@ +Add ability to limit number of monthly active users on the server From b2aab04d2c6021e6999fdc3c204db63d7c6f9334 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 14:12:56 +0100 Subject: [PATCH 17/26] fix py3 test failure --- tests/storage/test_client_ips.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index e1552510c..0cd290176 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -34,6 +34,7 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def test_insert_new_client_ip(self): + self.hs.config.max_mau_value = 50 self.clock.now = 12345678 user_id = "@user:id" yield self.store.insert_client_ip( From 5593ff6773c7556b85514ee172c4b6c06898f35e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 14:59:17 +0100 Subject: [PATCH 18/26] fix (lots of) py3 test failures --- synapse/config/server.py | 4 ++-- tests/storage/test_client_ips.py | 1 - tests/utils.py | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index 6a471a0a5..8fd231975 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -69,12 +69,12 @@ class ServerConfig(Config): # Options to control access by tracking MAU self.limit_usage_by_mau = config.get("limit_usage_by_mau", False) + self.max_mau_value = 0 if self.limit_usage_by_mau: self.max_mau_value = config.get( "max_mau_value", 0, ) - else: - self.max_mau_value = 0 + # FIXME: federation_domain_whitelist needs sytests self.federation_domain_whitelist = None federation_domain_whitelist = config.get( diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 0cd290176..e1552510c 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -34,7 +34,6 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def test_insert_new_client_ip(self): - self.hs.config.max_mau_value = 50 self.clock.now = 12345678 user_id = "@user:id" yield self.store.insert_client_ip( diff --git a/tests/utils.py b/tests/utils.py index 9bff3ff3b..ec40428e7 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -73,6 +73,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None config.block_events_without_consent_error = None config.media_storage_providers = [] config.auto_join_rooms = [] + config.limit_usage_by_mau = False # disable user directory updates, because they get done in the # background, which upsets the test runner. From e10830e9765eb3897da5af6ffb4809badb8e3009 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 17:55:50 +0100 Subject: [PATCH 19/26] wip commit - tests failing --- synapse/api/auth.py | 6 +- synapse/storage/client_ips.py | 21 +------ synapse/storage/monthly_active_users.py | 66 ++++++++++++++++------ tests/storage/test_client_ips.py | 12 ++-- tests/storage/test_monthly_active_users.py | 14 ++--- 5 files changed, 66 insertions(+), 53 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 943a48833..d8ebbbc6e 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -775,7 +775,7 @@ class Auth(object): ) @defer.inlineCallbacks - def check_auth_blocking(self, error): + def check_auth_blocking(self): """Checks if the user should be rejected for some external reason, such as monthly active user limiting or global disable flag Args: @@ -785,4 +785,6 @@ class Auth(object): if self.hs.config.limit_usage_by_mau is True: current_mau = yield self.store.get_monthly_active_count() if current_mau >= self.hs.config.max_mau_value: - raise error + raise AuthError( + 403, "MAU Limit Exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED + ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 83d64d156..2489527f2 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -86,7 +86,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_seen = self.client_ip_last_seen.get(key) except KeyError: last_seen = None - yield self._populate_monthly_active_users(user_id) + yield self.populate_monthly_active_users(user_id) # Rate-limited inserts if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return @@ -95,25 +95,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) - @defer.inlineCallbacks - def _populate_monthly_active_users(self, user_id): - """Checks on the state of monthly active user limits and optionally - add the user to the monthly active tables - - Args: - user_id(str): the user_id to query - """ - - store = self.hs.get_datastore() - if self.hs.config.limit_usage_by_mau: - is_user_monthly_active = yield store.is_user_monthly_active(user_id) - if is_user_monthly_active: - yield store.upsert_monthly_active_user(user_id) - else: - count = yield store.get_monthly_active_count() - if count < self.hs.config.max_mau_value: - yield store.upsert_monthly_active_user(user_id) - def _update_client_ips_batch(self): def update(): to_update = self._batch_row_update diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index d06c90cbc..6def6830d 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -19,6 +19,10 @@ from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from ._base import SQLBaseStore +# Number of msec of granularity to store the monthly_active_user timestamp +# This means it is not necessary to update the table on every request +LAST_SEEN_GRANULARITY = 60 * 60 * 1000 + class MonthlyActiveUsersStore(SQLBaseStore): def __init__(self, dbconn, hs): @@ -30,8 +34,9 @@ class MonthlyActiveUsersStore(SQLBaseStore): """ Cleans out monthly active user table to ensure that no stale entries exist. - Return: - Defered() + + Returns: + Deferred[] """ def _reap_users(txn): @@ -49,7 +54,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): """ txn.execute(sql, (self.hs.config.max_mau_value,)) - res = self.runInteraction("reap_monthly_active_users", _reap_users) + res = yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports # 'Returning' which would allow me to invalidate only the # specific users, but sqlite has no way to do this and instead @@ -57,16 +62,16 @@ class MonthlyActiveUsersStore(SQLBaseStore): # is racy. # Have resolved to invalidate the whole cache for now and do # something about it if and when the perf becomes significant - self.is_user_monthly_active.invalidate_all() + self._user_last_seen_monthly_active.invalidate_all() self.get_monthly_active_count.invalidate_all() return res @cached(num_args=0) def get_monthly_active_count(self): - """ - Generates current count of monthly active users.abs - Return: - Defered(int): Number of current monthly active users + """Generates current count of monthly active users.abs + + Returns: + Defered[int]: Number of current monthly active users """ def _count_users(txn): @@ -82,10 +87,10 @@ class MonthlyActiveUsersStore(SQLBaseStore): Updates or inserts monthly active user member Arguments: user_id (str): user to add/update - Deferred(bool): True if a new entry was created, False if an + Deferred[bool]: True if a new entry was created, False if an existing one was updated. """ - self._simple_upsert( + is_insert = self._simple_upsert( desc="upsert_monthly_active_user", table="monthly_active_users", keyvalues={ @@ -96,24 +101,49 @@ class MonthlyActiveUsersStore(SQLBaseStore): }, lock=False, ) - self.is_user_monthly_active.invalidate((user_id,)) - self.get_monthly_active_count.invalidate(()) + if is_insert: + self._user_last_seen_monthly_active.invalidate((user_id,)) + self.get_monthly_active_count.invalidate(()) @cachedInlineCallbacks(num_args=1) - def is_user_monthly_active(self, user_id): + def _user_last_seen_monthly_active(self, user_id): """ Checks if a given user is part of the monthly active user group Arguments: user_id (str): user to add/update Return: - bool : True if user part of group, False otherwise + int : timestamp since last seen, None if never seen + """ - user_present = yield self._simple_select_onecol( + result = yield self._simple_select_onecol( table="monthly_active_users", keyvalues={ "user_id": user_id, }, - retcol="user_id", - desc="is_user_monthly_active", + retcol="timestamp", + desc="_user_last_seen_monthly_active", ) - defer.returnValue(bool(user_present)) + timestamp = None + if len(result) > 0: + timestamp = result[0] + defer.returnValue(timestamp) + + @defer.inlineCallbacks + def populate_monthly_active_users(self, user_id): + """Checks on the state of monthly active user limits and optionally + add the user to the monthly active tables + + Args: + user_id(str): the user_id to query + """ + + if self.hs.config.limit_usage_by_mau: + last_seen_timestamp = yield self._user_last_seen_monthly_active(user_id) + now = self.hs.get_clock().time_msec() + + if last_seen_timestamp is None: + count = yield self.get_monthly_active_count() + if count < self.hs.config.max_mau_value: + yield self.upsert_monthly_active_user(user_id) + elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY: + yield self.upsert_monthly_active_user(user_id) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index e1552510c..7a58c6eb2 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -64,7 +64,7 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): yield self.store.insert_client_ip( user_id, "access_token", "ip", "user_agent", "device_id", ) - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertFalse(active) @defer.inlineCallbacks @@ -80,7 +80,7 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): yield self.store.insert_client_ip( user_id, "access_token", "ip", "user_agent", "device_id", ) - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertFalse(active) @defer.inlineCallbacks @@ -88,13 +88,13 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): self.hs.config.limit_usage_by_mau = True self.hs.config.max_mau_value = 50 user_id = "@user:server" - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertFalse(active) yield self.store.insert_client_ip( user_id, "access_token", "ip", "user_agent", "device_id", ) - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertTrue(active) @defer.inlineCallbacks @@ -103,7 +103,7 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): self.hs.config.max_mau_value = 50 user_id = "@user:server" - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertFalse(active) yield self.store.insert_client_ip( @@ -112,5 +112,5 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): yield self.store.insert_client_ip( user_id, "access_token", "ip", "user_agent", "device_id", ) - active = yield self.store.is_user_monthly_active(user_id) + active = yield self.store._user_last_seen_monthly_active(user_id) self.assertTrue(active) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index c32109ecc..0bfd24a7f 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -40,18 +40,18 @@ class MonthlyActiveUsersTestCase(tests.unittest.TestCase): self.assertEqual(1, count) @defer.inlineCallbacks - def test_is_user_monthly_active(self): + def test__user_last_seen_monthly_active(self): user_id1 = "@user1:server" user_id2 = "@user2:server" user_id3 = "@user3:server" - result = yield self.store.is_user_monthly_active(user_id1) - self.assertFalse(result) + result = yield self.store._user_last_seen_monthly_active(user_id1) + self.assertFalse(result == 0) yield self.store.upsert_monthly_active_user(user_id1) yield self.store.upsert_monthly_active_user(user_id2) - result = yield self.store.is_user_monthly_active(user_id1) - self.assertTrue(result) - result = yield self.store.is_user_monthly_active(user_id3) - self.assertFalse(result) + result = yield self.store._user_last_seen_monthly_active(user_id1) + self.assertTrue(result > 0) + result = yield self.store._user_last_seen_monthly_active(user_id3) + self.assertFalse(result == 0) @defer.inlineCallbacks def test_reap_monthly_active_users(self): From 886be75ad1bc60e016611b453b9644e8db17a9f1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 22:29:03 +0100 Subject: [PATCH 20/26] bug fixes --- synapse/handlers/auth.py | 15 ++------------- synapse/handlers/register.py | 8 ++++---- synapse/storage/monthly_active_users.py | 3 +-- tests/api/test_auth.py | 10 +++------- tests/handlers/test_register.py | 1 - 5 files changed, 10 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 8f9cff92e..7ea8ce9f9 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -520,7 +520,7 @@ class AuthHandler(BaseHandler): """ logger.info("Logging in user %s on device %s", user_id, device_id) access_token = yield self.issue_access_token(user_id, device_id) - yield self._check_mau_limits() + yield self.auth.check_auth_blocking() # the device *should* have been registered before we got here; however, # it's possible we raced against a DELETE operation. The thing we @@ -734,7 +734,7 @@ class AuthHandler(BaseHandler): @defer.inlineCallbacks def validate_short_term_login_token_and_get_user_id(self, login_token): - yield self._check_mau_limits() + yield self.auth.check_auth_blocking() auth_api = self.hs.get_auth() user_id = None try: @@ -907,17 +907,6 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) - @defer.inlineCallbacks - def _check_mau_limits(self): - """ - Ensure that if mau blocking is enabled that invalid users cannot - log in. - """ - error = AuthError( - 403, "Monthly Active User limits exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED - ) - yield self.auth.check_auth_blocking(error) - @attr.s class MacaroonGenerator(object): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 706ed8c29..8cf0a36a8 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -540,7 +540,7 @@ class RegistrationHandler(BaseHandler): Do not accept registrations if monthly active user limits exceeded and limiting is enabled """ - error = RegistrationError( - 403, "Monthly Active User limits exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED - ) - yield self.auth.check_auth_blocking(error) + try: + yield self.auth.check_auth_blocking() + except AuthError as e: + raise RegistrationError(e.code, e.message, e.errcode) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 6def6830d..135837507 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -54,7 +54,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): """ txn.execute(sql, (self.hs.config.max_mau_value,)) - res = yield self.runInteraction("reap_monthly_active_users", _reap_users) + yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports # 'Returning' which would allow me to invalidate only the # specific users, but sqlite has no way to do this and instead @@ -64,7 +64,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): # something about it if and when the perf becomes significant self._user_last_seen_monthly_active.invalidate_all() self.get_monthly_active_count.invalidate_all() - return res @cached(num_args=0) def get_monthly_active_count(self): diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 54bdf2866..e963963c7 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -452,12 +452,8 @@ class AuthTestCase(unittest.TestCase): lots_of_users = 100 small_number_of_users = 1 - error = AuthError( - 403, "MAU Limit Exceeded", errcode=Codes.MAU_LIMIT_EXCEEDED - ) - # Ensure no error thrown - yield self.auth.check_auth_blocking(error) + yield self.auth.check_auth_blocking() self.hs.config.limit_usage_by_mau = True @@ -466,10 +462,10 @@ class AuthTestCase(unittest.TestCase): ) with self.assertRaises(AuthError): - yield self.auth.check_auth_blocking(error) + yield self.auth.check_auth_blocking() # Ensure does not throw an error self.store.get_monthly_active_count = Mock( return_value=defer.succeed(small_number_of_users) ) - yield self.auth.check_auth_blocking(error) + yield self.auth.check_auth_blocking() diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 6b5b8b377..4ea59a58d 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -104,7 +104,6 @@ class RegistrationTestCase(unittest.TestCase): self.store.get_monthly_active_count = Mock( return_value=defer.succeed(self.lots_of_users) ) - with self.assertRaises(RegistrationError): yield self.handler.get_or_create_user("requester", 'b', "display_name") From d08296f9f278f39372bb5db99266d627614ecc96 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 23:00:16 +0100 Subject: [PATCH 21/26] remove unused import --- tests/api/test_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index e963963c7..5dc339830 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,7 +21,7 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth -from synapse.api.errors import AuthError, Codes +from synapse.api.errors import AuthError from synapse.types import UserID from tests import unittest From e40a510fbff931b5e6b295351847b95fb8c69e71 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 3 Aug 2018 23:19:13 +0100 Subject: [PATCH 22/26] py3 fix --- synapse/handlers/register.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 8cf0a36a8..0e16bbe0e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -543,4 +543,4 @@ class RegistrationHandler(BaseHandler): try: yield self.auth.check_auth_blocking() except AuthError as e: - raise RegistrationError(e.code, e.message, e.errcode) + raise RegistrationError(e.code, str(e), e.errcode) From 16d78be315fd71d8bdb39af53317b3f7dd1dbb32 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 17:51:15 +0100 Subject: [PATCH 23/26] make use of _simple_select_one_onecol, improved comments --- synapse/storage/monthly_active_users.py | 19 +++++++++++-------- .../schema/delta/51/monthly_active_users.sql | 4 ++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 135837507..c28c698c6 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -15,7 +15,7 @@ from twisted.internet import defer -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore @@ -104,7 +104,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): self._user_last_seen_monthly_active.invalidate((user_id,)) self.get_monthly_active_count.invalidate(()) - @cachedInlineCallbacks(num_args=1) + @cached(num_args=1) def _user_last_seen_monthly_active(self, user_id): """ Checks if a given user is part of the monthly active user group @@ -114,18 +114,16 @@ class MonthlyActiveUsersStore(SQLBaseStore): int : timestamp since last seen, None if never seen """ - result = yield self._simple_select_onecol( + + return(self._simple_select_one_onecol( table="monthly_active_users", keyvalues={ "user_id": user_id, }, retcol="timestamp", + allow_none=True, desc="_user_last_seen_monthly_active", - ) - timestamp = None - if len(result) > 0: - timestamp = result[0] - defer.returnValue(timestamp) + )) @defer.inlineCallbacks def populate_monthly_active_users(self, user_id): @@ -140,6 +138,11 @@ class MonthlyActiveUsersStore(SQLBaseStore): last_seen_timestamp = yield self._user_last_seen_monthly_active(user_id) now = self.hs.get_clock().time_msec() + # We want to reduce to the total number of db writes, and are happy + # to trade accuracy of timestamp in order to lighten load. This means + # We always insert new users (where MAU threshold has not been reached), + # but only update if we have not previously seen the user for + # LAST_SEEN_GRANULARITY ms if last_seen_timestamp is None: count = yield self.get_monthly_active_count() if count < self.hs.config.max_mau_value: diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql index f2b6d3e31..10aac90ce 100644 --- a/synapse/storage/schema/delta/51/monthly_active_users.sql +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -16,6 +16,10 @@ -- a table of monthly active users, for use where blocking based on mau limits CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, + -- Last time we saw the user. Not guaranteed to be accurate due to rate limiting + -- on updates, Granularity of updates governed by + -- syanpse.storage.monthly_active_users.LAST_SEEN_GRANULARITY + -- Measured in ms since epoch. timestamp BIGINT NOT NULL ); From 1911c037cbace498c0827eb5abe6ed3f64acb671 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 18:01:46 +0100 Subject: [PATCH 24/26] update comments to reflect new sig --- synapse/api/auth.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index d8ebbbc6e..91b23ff1d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -778,10 +778,7 @@ class Auth(object): def check_auth_blocking(self): """Checks if the user should be rejected for some external reason, such as monthly active user limiting or global disable flag - Args: - error (Error): The error that should be raised if user is to be - blocked - """ + """ if self.hs.config.limit_usage_by_mau is True: current_mau = yield self.store.get_monthly_active_count() if current_mau >= self.hs.config.max_mau_value: From e54794f5b658992627eab5400b13199128eec0a1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 21:55:54 +0100 Subject: [PATCH 25/26] Fix postgres compatibility bug --- synapse/storage/monthly_active_users.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index c28c698c6..abe1e6bb9 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -43,14 +43,25 @@ class MonthlyActiveUsersStore(SQLBaseStore): thirty_days_ago = ( int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) - + # Purge stale users sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - txn.execute(sql, (thirty_days_ago,)) + + # If MAU user count still exceeds the MAU threshold, then delete on + # a least recently active basis. + # Note it is not possible to write this query using OFFSET due to + # incompatibilities in how sqlite an postgres support the feature. + # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present + # While Postgres does not require 'LIMIT', but also does not support + # negative LIMIT values. So there is no way to write it that both can + # support sql = """ DELETE FROM monthly_active_users - ORDER BY timestamp desc - LIMIT -1 OFFSET ? + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? + ) """ txn.execute(sql, (self.hs.config.max_mau_value,)) From 312ae747466f7c60c0148b75446f357263330148 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 8 Aug 2018 13:33:16 +0100 Subject: [PATCH 26/26] typos --- synapse/storage/monthly_active_users.py | 4 ++-- synapse/storage/schema/delta/51/monthly_active_users.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index abe1e6bb9..8b3beaf26 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -50,7 +50,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): # If MAU user count still exceeds the MAU threshold, then delete on # a least recently active basis. # Note it is not possible to write this query using OFFSET due to - # incompatibilities in how sqlite an postgres support the feature. + # incompatibilities in how sqlite and postgres support the feature. # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present # While Postgres does not require 'LIMIT', but also does not support # negative LIMIT values. So there is no way to write it that both can @@ -78,7 +78,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): @cached(num_args=0) def get_monthly_active_count(self): - """Generates current count of monthly active users.abs + """Generates current count of monthly active users Returns: Defered[int]: Number of current monthly active users diff --git a/synapse/storage/schema/delta/51/monthly_active_users.sql b/synapse/storage/schema/delta/51/monthly_active_users.sql index 10aac90ce..c9d537d5a 100644 --- a/synapse/storage/schema/delta/51/monthly_active_users.sql +++ b/synapse/storage/schema/delta/51/monthly_active_users.sql @@ -18,7 +18,7 @@ CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, -- Last time we saw the user. Not guaranteed to be accurate due to rate limiting -- on updates, Granularity of updates governed by - -- syanpse.storage.monthly_active_users.LAST_SEEN_GRANULARITY + -- synapse.storage.monthly_active_users.LAST_SEEN_GRANULARITY -- Measured in ms since epoch. timestamp BIGINT NOT NULL );