From 898fef2789c9b1a20ef53c7d588f536f51f0fe2f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 5 Sep 2022 12:26:43 +0200 Subject: [PATCH] Share some metrics between the Prometheus exporter and the phone home stats (#13671) --- changelog.d/13671.misc | 1 + synapse/app/_base.py | 5 +- synapse/app/phone_stats_home.py | 13 +++- synapse/metrics/common_usage_metrics.py | 79 +++++++++++++++++++++++++ synapse/server.py | 6 ++ tests/test_phone_home.py | 46 +++++++++++++- 6 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13671.misc create mode 100644 synapse/metrics/common_usage_metrics.py diff --git a/changelog.d/13671.misc b/changelog.d/13671.misc new file mode 100644 index 000000000..f1c62b5b1 --- /dev/null +++ b/changelog.d/13671.misc @@ -0,0 +1 @@ +Introduce a `CommonUsageMetrics` class to share some usage metrics between the Prometheus exporter and the phone home stats. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 4742435d3..9a24bed0a 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -511,9 +511,10 @@ async def start(hs: "HomeServer") -> None: setup_sentry(hs) setup_sdnotify(hs) - # If background tasks are running on the main process, start collecting the - # phone home stats. + # If background tasks are running on the main process or this is the worker in + # charge of them, start collecting the phone home stats and shared usage metrics. if hs.config.worker.run_background_tasks: + await hs.get_common_usage_metrics_manager().setup() start_phone_stats_home(hs) # We now freeze all allocated objects in the hopes that (almost) diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 40dbdace8..51c8d1571 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -51,6 +51,16 @@ async def phone_stats_home( stats: JsonDict, stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process, ) -> None: + """Collect usage statistics and send them to the configured endpoint. + + Args: + hs: the HomeServer object to use for gathering usage data. + stats: the dict in which to store the statistics sent to the configured + endpoint. Mostly used in tests to figure out the data that is supposed to + be sent. + stats_process: statistics about resource usage of the process. + """ + logger.info("Gathering stats for reporting") now = int(hs.get_clock().time()) # Ensure the homeserver has started. @@ -83,6 +93,7 @@ async def phone_stats_home( # store = hs.get_datastores().main + common_metrics = await hs.get_common_usage_metrics_manager().get_metrics() stats["homeserver"] = hs.config.server.server_name stats["server_context"] = hs.config.server.server_context @@ -104,7 +115,7 @@ async def phone_stats_home( room_count = await store.get_room_count() stats["total_room_count"] = room_count - stats["daily_active_users"] = await store.count_daily_users() + stats["daily_active_users"] = common_metrics.daily_active_users stats["monthly_active_users"] = await store.count_monthly_users() daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms() stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms diff --git a/synapse/metrics/common_usage_metrics.py b/synapse/metrics/common_usage_metrics.py new file mode 100644 index 000000000..0a22ea3d9 --- /dev/null +++ b/synapse/metrics/common_usage_metrics.py @@ -0,0 +1,79 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import TYPE_CHECKING + +import attr + +from synapse.metrics.background_process_metrics import run_as_background_process + +if TYPE_CHECKING: + from synapse.server import HomeServer + +from prometheus_client import Gauge + +# Gauge to expose daily active users metrics +current_dau_gauge = Gauge( + "synapse_admin_daily_active_users", + "Current daily active users count", +) + + +@attr.s(auto_attribs=True) +class CommonUsageMetrics: + """Usage metrics shared between the phone home stats and the prometheus exporter.""" + + daily_active_users: int + + +class CommonUsageMetricsManager: + """Collects common usage metrics.""" + + def __init__(self, hs: "HomeServer") -> None: + self._store = hs.get_datastores().main + self._clock = hs.get_clock() + + async def get_metrics(self) -> CommonUsageMetrics: + """Get the CommonUsageMetrics object. If no collection has happened yet, do it + before returning the metrics. + + Returns: + The CommonUsageMetrics object to read common metrics from. + """ + return await self._collect() + + async def setup(self) -> None: + """Keep the gauges for common usage metrics up to date.""" + await self._update_gauges() + self._clock.looping_call( + run_as_background_process, + 5 * 60 * 1000, + desc="common_usage_metrics_update_gauges", + func=self._update_gauges, + ) + + async def _collect(self) -> CommonUsageMetrics: + """Collect the common metrics and either create the CommonUsageMetrics object to + use if it doesn't exist yet, or update it. + """ + dau_count = await self._store.count_daily_users() + + return CommonUsageMetrics( + daily_active_users=dau_count, + ) + + async def _update_gauges(self) -> None: + """Update the Prometheus gauges.""" + metrics = await self._collect() + + current_dau_gauge.set(float(metrics.daily_active_users)) diff --git a/synapse/server.py b/synapse/server.py index c2e55bf0b..5a99c0b34 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -105,6 +105,7 @@ from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler from synapse.handlers.user_directory import UserDirectoryHandler from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager from synapse.module_api import ModuleApi from synapse.notifier import Notifier from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator @@ -829,3 +830,8 @@ class HomeServer(metaclass=abc.ABCMeta): self.config.ratelimiting.rc_message, self.config.ratelimiting.rc_admin_redaction, ) + + @cache_in_self + def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager: + """Usage metrics shared between phone home stats and the prometheus exporter.""" + return CommonUsageMetricsManager(self) diff --git a/tests/test_phone_home.py b/tests/test_phone_home.py index b01cae6e5..cc1a98f1c 100644 --- a/tests/test_phone_home.py +++ b/tests/test_phone_home.py @@ -15,8 +15,14 @@ import resource from unittest import mock +from twisted.test.proto_helpers import MemoryReactor + from synapse.app.phone_stats_home import phone_stats_home +from synapse.rest import admin +from synapse.rest.client import login, sync +from synapse.server import HomeServer from synapse.types import JsonDict +from synapse.util import Clock from tests.unittest import HomeserverTestCase @@ -47,5 +53,43 @@ class PhoneHomeStatsTestCase(HomeserverTestCase): stats: JsonDict = {} self.reactor.advance(1) # `old_resource` has type `Mock` instead of `struct_rusage` - self.get_success(phone_stats_home(self.hs, stats, past_stats)) # type: ignore[arg-type] + self.get_success( + phone_stats_home(self.hs, stats, past_stats) # type: ignore[arg-type] + ) self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5) + + +class CommonMetricsTestCase(HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + sync.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.metrics_manager = hs.get_common_usage_metrics_manager() + self.get_success(self.metrics_manager.setup()) + + def test_dau(self) -> None: + """Tests that the daily active users count is correctly updated.""" + self._assert_metric_value("daily_active_users", 0) + + self.register_user("user", "password") + tok = self.login("user", "password") + self.make_request("GET", "/sync", access_token=tok) + + self.pump(1) + + self._assert_metric_value("daily_active_users", 1) + + def _assert_metric_value(self, metric_name: str, expected: int) -> None: + """Compare the given value to the current value of the common usage metric with + the given name. + + Args: + metric_name: The metric to look up. + expected: Expected value for this metric. + """ + metrics = self.get_success(self.metrics_manager.get_metrics()) + value = getattr(metrics, metric_name) + self.assertEqual(value, expected)