Share some metrics between the Prometheus exporter and the phone home stats (#13671)

This commit is contained in:
Brendan Abolivier 2022-09-05 12:26:43 +02:00 committed by GitHub
parent ad7fc8e92f
commit 898fef2789
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 146 additions and 4 deletions

1
changelog.d/13671.misc Normal file
View file

@ -0,0 +1 @@
Introduce a `CommonUsageMetrics` class to share some usage metrics between the Prometheus exporter and the phone home stats.

View file

@ -511,9 +511,10 @@ async def start(hs: "HomeServer") -> None:
setup_sentry(hs) setup_sentry(hs)
setup_sdnotify(hs) setup_sdnotify(hs)
# If background tasks are running on the main process, start collecting the # If background tasks are running on the main process or this is the worker in
# phone home stats. # charge of them, start collecting the phone home stats and shared usage metrics.
if hs.config.worker.run_background_tasks: if hs.config.worker.run_background_tasks:
await hs.get_common_usage_metrics_manager().setup()
start_phone_stats_home(hs) start_phone_stats_home(hs)
# We now freeze all allocated objects in the hopes that (almost) # We now freeze all allocated objects in the hopes that (almost)

View file

@ -51,6 +51,16 @@ async def phone_stats_home(
stats: JsonDict, stats: JsonDict,
stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process, stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process,
) -> None: ) -> None:
"""Collect usage statistics and send them to the configured endpoint.
Args:
hs: the HomeServer object to use for gathering usage data.
stats: the dict in which to store the statistics sent to the configured
endpoint. Mostly used in tests to figure out the data that is supposed to
be sent.
stats_process: statistics about resource usage of the process.
"""
logger.info("Gathering stats for reporting") logger.info("Gathering stats for reporting")
now = int(hs.get_clock().time()) now = int(hs.get_clock().time())
# Ensure the homeserver has started. # Ensure the homeserver has started.
@ -83,6 +93,7 @@ async def phone_stats_home(
# #
store = hs.get_datastores().main store = hs.get_datastores().main
common_metrics = await hs.get_common_usage_metrics_manager().get_metrics()
stats["homeserver"] = hs.config.server.server_name stats["homeserver"] = hs.config.server.server_name
stats["server_context"] = hs.config.server.server_context stats["server_context"] = hs.config.server.server_context
@ -104,7 +115,7 @@ async def phone_stats_home(
room_count = await store.get_room_count() room_count = await store.get_room_count()
stats["total_room_count"] = room_count stats["total_room_count"] = room_count
stats["daily_active_users"] = await store.count_daily_users() stats["daily_active_users"] = common_metrics.daily_active_users
stats["monthly_active_users"] = await store.count_monthly_users() stats["monthly_active_users"] = await store.count_monthly_users()
daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms() daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms

View file

@ -0,0 +1,79 @@
# Copyright 2022 The Matrix.org Foundation C.I.C
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
import attr
from synapse.metrics.background_process_metrics import run_as_background_process
if TYPE_CHECKING:
from synapse.server import HomeServer
from prometheus_client import Gauge
# Gauge to expose daily active users metrics
current_dau_gauge = Gauge(
"synapse_admin_daily_active_users",
"Current daily active users count",
)
@attr.s(auto_attribs=True)
class CommonUsageMetrics:
"""Usage metrics shared between the phone home stats and the prometheus exporter."""
daily_active_users: int
class CommonUsageMetricsManager:
"""Collects common usage metrics."""
def __init__(self, hs: "HomeServer") -> None:
self._store = hs.get_datastores().main
self._clock = hs.get_clock()
async def get_metrics(self) -> CommonUsageMetrics:
"""Get the CommonUsageMetrics object. If no collection has happened yet, do it
before returning the metrics.
Returns:
The CommonUsageMetrics object to read common metrics from.
"""
return await self._collect()
async def setup(self) -> None:
"""Keep the gauges for common usage metrics up to date."""
await self._update_gauges()
self._clock.looping_call(
run_as_background_process,
5 * 60 * 1000,
desc="common_usage_metrics_update_gauges",
func=self._update_gauges,
)
async def _collect(self) -> CommonUsageMetrics:
"""Collect the common metrics and either create the CommonUsageMetrics object to
use if it doesn't exist yet, or update it.
"""
dau_count = await self._store.count_daily_users()
return CommonUsageMetrics(
daily_active_users=dau_count,
)
async def _update_gauges(self) -> None:
"""Update the Prometheus gauges."""
metrics = await self._collect()
current_dau_gauge.set(float(metrics.daily_active_users))

View file

@ -105,6 +105,7 @@ from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler
from synapse.handlers.user_directory import UserDirectoryHandler from synapse.handlers.user_directory import UserDirectoryHandler
from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
from synapse.module_api import ModuleApi from synapse.module_api import ModuleApi
from synapse.notifier import Notifier from synapse.notifier import Notifier
from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
@ -829,3 +830,8 @@ class HomeServer(metaclass=abc.ABCMeta):
self.config.ratelimiting.rc_message, self.config.ratelimiting.rc_message,
self.config.ratelimiting.rc_admin_redaction, self.config.ratelimiting.rc_admin_redaction,
) )
@cache_in_self
def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager:
"""Usage metrics shared between phone home stats and the prometheus exporter."""
return CommonUsageMetricsManager(self)

View file

@ -15,8 +15,14 @@
import resource import resource
from unittest import mock from unittest import mock
from twisted.test.proto_helpers import MemoryReactor
from synapse.app.phone_stats_home import phone_stats_home from synapse.app.phone_stats_home import phone_stats_home
from synapse.rest import admin
from synapse.rest.client import login, sync
from synapse.server import HomeServer
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util import Clock
from tests.unittest import HomeserverTestCase from tests.unittest import HomeserverTestCase
@ -47,5 +53,43 @@ class PhoneHomeStatsTestCase(HomeserverTestCase):
stats: JsonDict = {} stats: JsonDict = {}
self.reactor.advance(1) self.reactor.advance(1)
# `old_resource` has type `Mock` instead of `struct_rusage` # `old_resource` has type `Mock` instead of `struct_rusage`
self.get_success(phone_stats_home(self.hs, stats, past_stats)) # type: ignore[arg-type] self.get_success(
phone_stats_home(self.hs, stats, past_stats) # type: ignore[arg-type]
)
self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5) self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)
class CommonMetricsTestCase(HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
sync.register_servlets,
]
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.metrics_manager = hs.get_common_usage_metrics_manager()
self.get_success(self.metrics_manager.setup())
def test_dau(self) -> None:
"""Tests that the daily active users count is correctly updated."""
self._assert_metric_value("daily_active_users", 0)
self.register_user("user", "password")
tok = self.login("user", "password")
self.make_request("GET", "/sync", access_token=tok)
self.pump(1)
self._assert_metric_value("daily_active_users", 1)
def _assert_metric_value(self, metric_name: str, expected: int) -> None:
"""Compare the given value to the current value of the common usage metric with
the given name.
Args:
metric_name: The metric to look up.
expected: Expected value for this metric.
"""
metrics = self.get_success(self.metrics_manager.get_metrics())
value = getattr(metrics, metric_name)
self.assertEqual(value, expected)