forked from MirrorHub/synapse
replacing portions
This commit is contained in:
parent
c60e0d5e02
commit
df9f72d9e5
23 changed files with 270 additions and 416 deletions
|
@ -57,7 +57,7 @@ class Auth(object):
|
|||
self.TOKEN_NOT_FOUND_HTTP_STATUS = 401
|
||||
|
||||
self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000)
|
||||
register_cache("token_cache", self.token_cache)
|
||||
register_cache("cache", "token_cache", self.token_cache)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def check_from_context(self, event, context, do_sig_check=True):
|
||||
|
|
|
@ -32,20 +32,17 @@ from synapse.federation.federation_base import (
|
|||
FederationBase,
|
||||
event_from_pdu_json,
|
||||
)
|
||||
import synapse.metrics
|
||||
from synapse.util import logcontext, unwrapFirstError
|
||||
from synapse.util.caches.expiringcache import ExpiringCache
|
||||
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
|
||||
from synapse.util.logutils import log_function
|
||||
from synapse.util.retryutils import NotRetryingDestination
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# synapse.federation.federation_client is a silly name
|
||||
metrics = synapse.metrics.get_metrics_for("synapse.federation.client")
|
||||
|
||||
sent_queries_counter = metrics.register_counter("sent_queries", labels=["type"])
|
||||
sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["type"])
|
||||
|
||||
|
||||
PDU_RETRY_TIME_MS = 1 * 60 * 1000
|
||||
|
@ -108,7 +105,7 @@ class FederationClient(FederationBase):
|
|||
a Deferred which will eventually yield a JSON object from the
|
||||
response
|
||||
"""
|
||||
sent_queries_counter.inc(query_type)
|
||||
sent_queries_counter.labels(query_type).inc()
|
||||
|
||||
return self.transport_layer.make_query(
|
||||
destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail,
|
||||
|
@ -127,7 +124,7 @@ class FederationClient(FederationBase):
|
|||
a Deferred which will eventually yield a JSON object from the
|
||||
response
|
||||
"""
|
||||
sent_queries_counter.inc("client_device_keys")
|
||||
sent_queries_counter.labels("client_device_keys").inc()
|
||||
return self.transport_layer.query_client_keys(
|
||||
destination, content, timeout
|
||||
)
|
||||
|
@ -137,7 +134,7 @@ class FederationClient(FederationBase):
|
|||
"""Query the device keys for a list of user ids hosted on a remote
|
||||
server.
|
||||
"""
|
||||
sent_queries_counter.inc("user_devices")
|
||||
sent_queries_counter.labels("user_devices").inc()
|
||||
return self.transport_layer.query_user_devices(
|
||||
destination, user_id, timeout
|
||||
)
|
||||
|
@ -154,7 +151,7 @@ class FederationClient(FederationBase):
|
|||
a Deferred which will eventually yield a JSON object from the
|
||||
response
|
||||
"""
|
||||
sent_queries_counter.inc("client_one_time_keys")
|
||||
sent_queries_counter.labels("client_one_time_keys").inc()
|
||||
return self.transport_layer.claim_client_keys(
|
||||
destination, content, timeout
|
||||
)
|
||||
|
|
|
@ -27,12 +27,13 @@ from synapse.federation.federation_base import (
|
|||
|
||||
from synapse.federation.persistence import TransactionActions
|
||||
from synapse.federation.units import Edu, Transaction
|
||||
import synapse.metrics
|
||||
from synapse.types import get_domain_from_id
|
||||
from synapse.util import async
|
||||
from synapse.util.caches.response_cache import ResponseCache
|
||||
from synapse.util.logutils import log_function
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
from six import iteritems
|
||||
|
||||
# when processing incoming transactions, we try to handle multiple rooms in
|
||||
|
@ -41,14 +42,11 @@ TRANSACTION_CONCURRENCY_LIMIT = 10
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# synapse.federation.federation_server is a silly name
|
||||
metrics = synapse.metrics.get_metrics_for("synapse.federation.server")
|
||||
received_pdus_counter = Counter("synapse_federation_server_received_pdus", "")
|
||||
|
||||
received_pdus_counter = metrics.register_counter("received_pdus")
|
||||
received_edus_counter = Counter("synapse_federation_server_received_edus", "")
|
||||
|
||||
received_edus_counter = metrics.register_counter("received_edus")
|
||||
|
||||
received_queries_counter = metrics.register_counter("received_queries", labels=["type"])
|
||||
received_queries_counter = Counter("synapse_federation_server_received_queries", "", ["type"])
|
||||
|
||||
|
||||
class FederationServer(FederationBase):
|
||||
|
@ -131,7 +129,7 @@ class FederationServer(FederationBase):
|
|||
|
||||
logger.debug("[%s] Transaction is new", transaction.transaction_id)
|
||||
|
||||
received_pdus_counter.inc_by(len(transaction.pdus))
|
||||
received_pdus_counter.inc(len(transaction.pdus))
|
||||
|
||||
pdus_by_room = {}
|
||||
|
||||
|
@ -292,7 +290,7 @@ class FederationServer(FederationBase):
|
|||
|
||||
@defer.inlineCallbacks
|
||||
def on_query_request(self, query_type, args):
|
||||
received_queries_counter.inc(query_type)
|
||||
received_queries_counter.labels(query_type).inc()
|
||||
resp = yield self.registry.on_query(query_type, args)
|
||||
defer.returnValue((200, resp))
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ from .units import Edu
|
|||
|
||||
from synapse.storage.presence import UserPresenceState
|
||||
from synapse.util.metrics import Measure
|
||||
import synapse.metrics
|
||||
from synapse.metrics import LaterGauge
|
||||
|
||||
from blist import sorteddict
|
||||
from collections import namedtuple
|
||||
|
@ -45,9 +45,6 @@ from six import itervalues, iteritems
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
|
||||
class FederationRemoteSendQueue(object):
|
||||
"""A drop in replacement for TransactionQueue"""
|
||||
|
||||
|
@ -77,8 +74,7 @@ class FederationRemoteSendQueue(object):
|
|||
# lambda binds to the queue rather than to the name of the queue which
|
||||
# changes. ARGH.
|
||||
def register(name, queue):
|
||||
metrics.register_callback(
|
||||
queue_name + "_size",
|
||||
LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "",
|
||||
lambda: len(queue),
|
||||
)
|
||||
|
||||
|
|
|
@ -26,23 +26,18 @@ from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
|
|||
from synapse.util.metrics import measure_func
|
||||
from synapse.handlers.presence import format_user_presence_state, get_interested_remotes
|
||||
import synapse.metrics
|
||||
from synapse.metrics import LaterGauge
|
||||
from synapse.metrics import (
|
||||
sent_edus_counter, sent_transactions_counter, events_processed_counter)
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
client_metrics = synapse.metrics.get_metrics_for("synapse.federation.client")
|
||||
sent_pdus_destination_dist = client_metrics.register_distribution(
|
||||
"sent_pdu_destinations"
|
||||
)
|
||||
sent_edus_counter = client_metrics.register_counter("sent_edus")
|
||||
|
||||
sent_transactions_counter = client_metrics.register_counter("sent_transactions")
|
||||
|
||||
events_processed_counter = client_metrics.register_counter("events_processed")
|
||||
sent_pdus_destination_dist = Counter("synapse_federation_client_sent_pdu_destinations", "")
|
||||
|
||||
|
||||
class TransactionQueue(object):
|
||||
|
@ -69,8 +64,7 @@ class TransactionQueue(object):
|
|||
# done
|
||||
self.pending_transactions = {}
|
||||
|
||||
metrics.register_callback(
|
||||
"pending_destinations",
|
||||
LaterGauge("pending_destinations", "", [],
|
||||
lambda: len(self.pending_transactions),
|
||||
)
|
||||
|
||||
|
@ -94,12 +88,12 @@ class TransactionQueue(object):
|
|||
# Map of destination -> (edu_type, key) -> Edu
|
||||
self.pending_edus_keyed_by_dest = edus_keyed = {}
|
||||
|
||||
metrics.register_callback(
|
||||
"pending_pdus",
|
||||
LaterGauge(
|
||||
"pending_pdus", "", [],
|
||||
lambda: sum(map(len, pdus.values())),
|
||||
)
|
||||
metrics.register_callback(
|
||||
"pending_edus",
|
||||
LaterGauge(
|
||||
"pending_edus", "", [],
|
||||
lambda: (
|
||||
sum(map(len, edus.values()))
|
||||
+ sum(map(len, presence.values()))
|
||||
|
@ -241,18 +235,15 @@ class TransactionQueue(object):
|
|||
now = self.clock.time_msec()
|
||||
ts = yield self.store.get_received_ts(events[-1].event_id)
|
||||
|
||||
synapse.metrics.event_processing_lag.set(
|
||||
now - ts, "federation_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_last_ts.set(
|
||||
ts, "federation_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_lag.labels(
|
||||
"federation_sender").set(now - ts)
|
||||
synapse.metrics.event_processing_last_ts.labels(
|
||||
"federation_sender").set(ts)
|
||||
|
||||
events_processed_counter.inc_by(len(events))
|
||||
events_processed_counter.inc(len(events))
|
||||
|
||||
synapse.metrics.event_processing_positions.set(
|
||||
next_token, "federation_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_positions.labels(
|
||||
"federation_sender").set(next_token)
|
||||
|
||||
finally:
|
||||
self._is_processing = False
|
||||
|
@ -275,7 +266,7 @@ class TransactionQueue(object):
|
|||
if not destinations:
|
||||
return
|
||||
|
||||
sent_pdus_destination_dist.inc_by(len(destinations))
|
||||
sent_pdus_destination_dist.inc(len(destinations))
|
||||
|
||||
for destination in destinations:
|
||||
self.pending_pdus_by_dest.setdefault(destination, []).append(
|
||||
|
|
|
@ -21,14 +21,13 @@ from synapse.util.metrics import Measure
|
|||
from synapse.util.logcontext import (
|
||||
make_deferred_yieldable, run_in_background,
|
||||
)
|
||||
from prometheus_client import Counter
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
events_processed_counter = metrics.register_counter("events_processed")
|
||||
events_processed_counter = Counter("synapse_handlers_appservice_events_processed", "")
|
||||
|
||||
|
||||
def log_failure(failure):
|
||||
|
@ -128,18 +127,12 @@ class ApplicationServicesHandler(object):
|
|||
now = self.clock.time_msec()
|
||||
ts = yield self.store.get_received_ts(events[-1].event_id)
|
||||
|
||||
synapse.metrics.event_processing_positions.set(
|
||||
upper_bound, "appservice_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_positions.labels("appservice_sender").set(upper_bound)
|
||||
|
||||
events_processed_counter.inc_by(len(events))
|
||||
events_processed_counter.inc(len(events))
|
||||
|
||||
synapse.metrics.event_processing_lag.set(
|
||||
now - ts, "appservice_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_last_ts.set(
|
||||
ts, "appservice_sender",
|
||||
)
|
||||
synapse.metrics.event_processing_lag.labels("appservice_sender").set(now - ts)
|
||||
synapse.metrics.event_processing_last_ts.labels("appservice_sender").set(ts)
|
||||
finally:
|
||||
self.is_processing = False
|
||||
|
||||
|
|
|
@ -36,27 +36,27 @@ from synapse.util.logutils import log_function
|
|||
from synapse.util.metrics import Measure
|
||||
from synapse.util.wheel_timer import WheelTimer
|
||||
from synapse.types import UserID, get_domain_from_id
|
||||
import synapse.metrics
|
||||
from synapse.metrics import LaterGauge
|
||||
|
||||
import logging
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
notified_presence_counter = metrics.register_counter("notified_presence")
|
||||
federation_presence_out_counter = metrics.register_counter("federation_presence_out")
|
||||
presence_updates_counter = metrics.register_counter("presence_updates")
|
||||
timers_fired_counter = metrics.register_counter("timers_fired")
|
||||
federation_presence_counter = metrics.register_counter("federation_presence")
|
||||
bump_active_time_counter = metrics.register_counter("bump_active_time")
|
||||
notified_presence_counter = Counter("synapse_handler_presence_notified_presence", "")
|
||||
federation_presence_out_counter = Counter("synapse_handler_presence_federation_presence_out", "")
|
||||
presence_updates_counter = Counter("synapse_handler_presence_presence_updates", "")
|
||||
timers_fired_counter = Counter("synapse_handler_presence_timers_fired", "")
|
||||
federation_presence_counter = Counter("synapse_handler_presence_federation_presence", "")
|
||||
bump_active_time_counter = Counter("synapse_handler_presence_bump_active_time", "")
|
||||
|
||||
get_updates_counter = metrics.register_counter("get_updates", labels=["type"])
|
||||
get_updates_counter = Counter("synapse_handler_presence_get_updates", "", ["type"])
|
||||
|
||||
notify_reason_counter = metrics.register_counter("notify_reason", labels=["reason"])
|
||||
state_transition_counter = metrics.register_counter(
|
||||
"state_transition", labels=["from", "to"]
|
||||
notify_reason_counter = Counter("synapse_handler_presence_notify_reason", "", ["reason"])
|
||||
state_transition_counter = Counter("synapse_handler_presence_state_transition", "", ["from", "to"]
|
||||
)
|
||||
|
||||
|
||||
|
@ -137,9 +137,9 @@ class PresenceHandler(object):
|
|||
for state in active_presence
|
||||
}
|
||||
|
||||
metrics.register_callback(
|
||||
"user_to_current_state_size", lambda: len(self.user_to_current_state)
|
||||
)
|
||||
LaterGauge(
|
||||
"user_to_current_state_size", "", [], lambda: len(self.user_to_current_state)
|
||||
).register()
|
||||
|
||||
now = self.clock.time_msec()
|
||||
for state in active_presence:
|
||||
|
@ -208,7 +208,7 @@ class PresenceHandler(object):
|
|||
60 * 1000,
|
||||
)
|
||||
|
||||
metrics.register_callback("wheel_timer_size", lambda: len(self.wheel_timer))
|
||||
LaterGauge("wheel_timer_size", "", [], lambda: len(self.wheel_timer)).register()
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _on_shutdown(self):
|
||||
|
@ -311,10 +311,10 @@ class PresenceHandler(object):
|
|||
|
||||
# TODO: We should probably ensure there are no races hereafter
|
||||
|
||||
presence_updates_counter.inc_by(len(new_states))
|
||||
presence_updates_counter.inc(len(new_states))
|
||||
|
||||
if to_notify:
|
||||
notified_presence_counter.inc_by(len(to_notify))
|
||||
notified_presence_counter.inc(len(to_notify))
|
||||
yield self._persist_and_notify(to_notify.values())
|
||||
|
||||
self.unpersisted_users_changes |= set(s.user_id for s in new_states)
|
||||
|
@ -325,7 +325,7 @@ class PresenceHandler(object):
|
|||
if user_id not in to_notify
|
||||
}
|
||||
if to_federation_ping:
|
||||
federation_presence_out_counter.inc_by(len(to_federation_ping))
|
||||
federation_presence_out_counter.inc(len(to_federation_ping))
|
||||
|
||||
self._push_to_remotes(to_federation_ping.values())
|
||||
|
||||
|
@ -363,7 +363,7 @@ class PresenceHandler(object):
|
|||
for user_id in users_to_check
|
||||
]
|
||||
|
||||
timers_fired_counter.inc_by(len(states))
|
||||
timers_fired_counter.inc(len(states))
|
||||
|
||||
changes = handle_timeouts(
|
||||
states,
|
||||
|
@ -707,7 +707,7 @@ class PresenceHandler(object):
|
|||
updates.append(prev_state.copy_and_replace(**new_fields))
|
||||
|
||||
if updates:
|
||||
federation_presence_counter.inc_by(len(updates))
|
||||
federation_presence_counter.inc(len(updates))
|
||||
yield self._update_states(updates)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
|
@ -982,28 +982,28 @@ def should_notify(old_state, new_state):
|
|||
return False
|
||||
|
||||
if old_state.status_msg != new_state.status_msg:
|
||||
notify_reason_counter.inc("status_msg_change")
|
||||
notify_reason_counter.labels("status_msg_change").inc()
|
||||
return True
|
||||
|
||||
if old_state.state != new_state.state:
|
||||
notify_reason_counter.inc("state_change")
|
||||
state_transition_counter.inc(old_state.state, new_state.state)
|
||||
notify_reason_counter.labels("state_change").inc()
|
||||
state_transition_counter.labels(old_state.state, new_state.state).inc()
|
||||
return True
|
||||
|
||||
if old_state.state == PresenceState.ONLINE:
|
||||
if new_state.currently_active != old_state.currently_active:
|
||||
notify_reason_counter.inc("current_active_change")
|
||||
notify_reason_counter.labels("current_active_change").inc()
|
||||
return True
|
||||
|
||||
if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
||||
# Only notify about last active bumps if we're not currently acive
|
||||
if not new_state.currently_active:
|
||||
notify_reason_counter.inc("last_active_change_online")
|
||||
notify_reason_counter.labels("last_active_change_online").inc()
|
||||
return True
|
||||
|
||||
elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
|
||||
# Always notify for a transition where last active gets bumped.
|
||||
notify_reason_counter.inc("last_active_change_not_online")
|
||||
notify_reason_counter.labels("last_active_change_not_online").inc()
|
||||
return True
|
||||
|
||||
return False
|
||||
|
@ -1077,14 +1077,14 @@ class PresenceEventSource(object):
|
|||
if changed is not None and len(changed) < 500:
|
||||
# For small deltas, its quicker to get all changes and then
|
||||
# work out if we share a room or they're in our presence list
|
||||
get_updates_counter.inc("stream")
|
||||
get_updates_counter.labels("stream").inc()
|
||||
for other_user_id in changed:
|
||||
if other_user_id in users_interested_in:
|
||||
user_ids_changed.add(other_user_id)
|
||||
else:
|
||||
# Too many possible updates. Find all users we can see and check
|
||||
# if any of them have changed.
|
||||
get_updates_counter.inc("full")
|
||||
get_updates_counter.labels("full").inc()
|
||||
|
||||
if from_key:
|
||||
user_ids_changed = stream_change_cache.get_entities_changed(
|
||||
|
|
|
@ -23,7 +23,6 @@ from synapse.http import cancelled_to_request_timed_out_error
|
|||
from synapse.util.async import add_timeout_to_deferred
|
||||
from synapse.util.caches import CACHE_SIZE_FACTOR
|
||||
from synapse.util.logcontext import make_deferred_yieldable
|
||||
import synapse.metrics
|
||||
from synapse.http.endpoint import SpiderEndpoint
|
||||
|
||||
from canonicaljson import encode_canonical_json
|
||||
|
@ -42,6 +41,7 @@ from twisted.web._newclient import ResponseDone
|
|||
|
||||
from six import StringIO
|
||||
|
||||
from prometheus_client import Counter
|
||||
import simplejson as json
|
||||
import logging
|
||||
import urllib
|
||||
|
@ -49,16 +49,8 @@ import urllib
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
outgoing_requests_counter = metrics.register_counter(
|
||||
"requests",
|
||||
labels=["method"],
|
||||
)
|
||||
incoming_responses_counter = metrics.register_counter(
|
||||
"responses",
|
||||
labels=["method", "code"],
|
||||
)
|
||||
outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"])
|
||||
incoming_responses_counter = Counter("synapse_http_client_responses", "", ["method", "code"])
|
||||
|
||||
|
||||
class SimpleHttpClient(object):
|
||||
|
@ -95,7 +87,7 @@ class SimpleHttpClient(object):
|
|||
def request(self, method, uri, *args, **kwargs):
|
||||
# A small wrapper around self.agent.request() so we can easily attach
|
||||
# counters to it
|
||||
outgoing_requests_counter.inc(method)
|
||||
outgoing_requests_counter.labels(method).inc()
|
||||
|
||||
logger.info("Sending request %s %s", method, uri)
|
||||
|
||||
|
@ -109,14 +101,14 @@ class SimpleHttpClient(object):
|
|||
)
|
||||
response = yield make_deferred_yieldable(request_deferred)
|
||||
|
||||
incoming_responses_counter.inc(method, response.code)
|
||||
incoming_responses_counter.labels(method, response.code).inc()
|
||||
logger.info(
|
||||
"Received response to %s %s: %s",
|
||||
method, uri, response.code
|
||||
)
|
||||
defer.returnValue(response)
|
||||
except Exception as e:
|
||||
incoming_responses_counter.inc(method, "ERR")
|
||||
incoming_responses_counter.labels(method, "ERR").inc()
|
||||
logger.info(
|
||||
"Error sending request to %s %s: %s %s",
|
||||
method, uri, type(e).__name__, e.message
|
||||
|
|
|
@ -43,19 +43,13 @@ import sys
|
|||
import urllib
|
||||
from six.moves.urllib import parse as urlparse
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
outbound_logger = logging.getLogger("synapse.http.outbound")
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
outgoing_requests_counter = metrics.register_counter(
|
||||
"requests",
|
||||
labels=["method"],
|
||||
)
|
||||
incoming_responses_counter = metrics.register_counter(
|
||||
"responses",
|
||||
labels=["method", "code"],
|
||||
)
|
||||
outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"])
|
||||
incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses", "", ["method", "code"])
|
||||
|
||||
|
||||
MAX_LONG_RETRIES = 10
|
||||
|
|
|
@ -18,15 +18,14 @@ import functools
|
|||
import time
|
||||
import gc
|
||||
import platform
|
||||
import attr
|
||||
|
||||
from prometheus_client import Gauge, Histogram, Counter
|
||||
from prometheus_client.core import (
|
||||
GaugeMetricFamily, CounterMetricFamily, REGISTRY)
|
||||
|
||||
from twisted.internet import reactor
|
||||
|
||||
from .metric import (
|
||||
CounterMetric, CallbackMetric, DistributionMetric, CacheMetric,
|
||||
MemoryUsageMetric, GaugeMetric,
|
||||
)
|
||||
from .process_collector import register_process_collector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -34,149 +33,94 @@ logger = logging.getLogger(__name__)
|
|||
running_on_pypy = platform.python_implementation() == 'PyPy'
|
||||
all_metrics = []
|
||||
all_collectors = []
|
||||
all_gauges = {}
|
||||
|
||||
@attr.s(hash=True)
|
||||
class LaterGauge(object):
|
||||
|
||||
class Metrics(object):
|
||||
""" A single Metrics object gives a (mutable) slice view of the all_metrics
|
||||
dict, allowing callers to easily register new metrics that are namespaced
|
||||
nicely."""
|
||||
name = attr.ib()
|
||||
desc = attr.ib()
|
||||
labels = attr.ib(hash=False)
|
||||
caller = attr.ib()
|
||||
|
||||
def __init__(self, name):
|
||||
self.name_prefix = name
|
||||
def collect(self):
|
||||
|
||||
def make_subspace(self, name):
|
||||
return Metrics("%s_%s" % (self.name_prefix, name))
|
||||
g = GaugeMetricFamily(self.name, self.desc, self.labels)
|
||||
|
||||
def register_collector(self, func):
|
||||
all_collectors.append(func)
|
||||
|
||||
def _register(self, metric_class, name, *args, **kwargs):
|
||||
full_name = "%s_%s" % (self.name_prefix, name)
|
||||
|
||||
metric = metric_class(full_name, *args, **kwargs)
|
||||
|
||||
all_metrics.append(metric)
|
||||
return metric
|
||||
|
||||
def register_counter(self, *args, **kwargs):
|
||||
"""
|
||||
Returns:
|
||||
CounterMetric
|
||||
"""
|
||||
return self._register(CounterMetric, *args, **kwargs)
|
||||
|
||||
def register_gauge(self, *args, **kwargs):
|
||||
"""
|
||||
Returns:
|
||||
GaugeMetric
|
||||
"""
|
||||
return self._register(GaugeMetric, *args, **kwargs)
|
||||
|
||||
def register_callback(self, *args, **kwargs):
|
||||
"""
|
||||
Returns:
|
||||
CallbackMetric
|
||||
"""
|
||||
return self._register(CallbackMetric, *args, **kwargs)
|
||||
|
||||
def register_distribution(self, *args, **kwargs):
|
||||
"""
|
||||
Returns:
|
||||
DistributionMetric
|
||||
"""
|
||||
return self._register(DistributionMetric, *args, **kwargs)
|
||||
|
||||
def register_cache(self, *args, **kwargs):
|
||||
"""
|
||||
Returns:
|
||||
CacheMetric
|
||||
"""
|
||||
return self._register(CacheMetric, *args, **kwargs)
|
||||
|
||||
|
||||
def register_memory_metrics(hs):
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
process.memory_info().rss
|
||||
except (ImportError, AttributeError):
|
||||
logger.warn(
|
||||
"psutil is not installed or incorrect version."
|
||||
" Disabling memory metrics."
|
||||
)
|
||||
return
|
||||
metric = MemoryUsageMetric(hs, psutil)
|
||||
all_metrics.append(metric)
|
||||
|
||||
|
||||
def get_metrics_for(pkg_name):
|
||||
""" Returns a Metrics instance for conveniently creating metrics
|
||||
namespaced with the given name prefix. """
|
||||
|
||||
# Convert a "package.name" to "package_name" because Prometheus doesn't
|
||||
# let us use . in metric names
|
||||
return Metrics(pkg_name.replace(".", "_"))
|
||||
|
||||
|
||||
def render_all():
|
||||
strs = []
|
||||
|
||||
for collector in all_collectors:
|
||||
collector()
|
||||
|
||||
for metric in all_metrics:
|
||||
try:
|
||||
strs += metric.render()
|
||||
except Exception:
|
||||
strs += ["# FAILED to render"]
|
||||
logger.exception("Failed to render metric")
|
||||
calls = self.caller()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.err()
|
||||
yield g
|
||||
|
||||
strs.append("") # to generate a final CRLF
|
||||
if isinstance(calls, dict):
|
||||
for k, v in calls.items():
|
||||
g.add_metric(k, v)
|
||||
else:
|
||||
g.add_metric([], calls)
|
||||
|
||||
return "\n".join(strs)
|
||||
yield g
|
||||
|
||||
def register(self):
|
||||
if self.name in all_gauges.keys():
|
||||
REGISTRY.unregister(all_gauges.pop(self.name))
|
||||
|
||||
REGISTRY.register(self)
|
||||
all_gauges[self.name] = self
|
||||
|
||||
|
||||
register_process_collector(get_metrics_for("process"))
|
||||
#
|
||||
# Python GC metrics
|
||||
#
|
||||
|
||||
gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
|
||||
gc_time = Histogram("python_gc_time", "Time taken to GC (ms)", ["gen"], buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000])
|
||||
|
||||
python_metrics = get_metrics_for("python")
|
||||
class GCCounts(object):
|
||||
def collect(self):
|
||||
gc_counts = gc.get_count()
|
||||
|
||||
gc_time = python_metrics.register_distribution("gc_time", labels=["gen"])
|
||||
gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"])
|
||||
python_metrics.register_callback(
|
||||
"gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
|
||||
)
|
||||
cm = GaugeMetricFamily("python_gc_counts", "GC cycle counts", labels=["gen"])
|
||||
for n, m in enumerate(gc.get_count()):
|
||||
cm.add_metric([str(n)], m)
|
||||
|
||||
reactor_metrics = get_metrics_for("python.twisted.reactor")
|
||||
tick_time = reactor_metrics.register_distribution("tick_time")
|
||||
pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
|
||||
yield cm
|
||||
|
||||
synapse_metrics = get_metrics_for("synapse")
|
||||
REGISTRY.register(GCCounts())
|
||||
|
||||
#
|
||||
# Twisted reactor metrics
|
||||
#
|
||||
|
||||
tick_time = Histogram("python_twisted_reactor_tick_time", "Tick time of the Twisted reactor (ms)", buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000])
|
||||
pending_calls_metric = Histogram("python_twisted_reactor_pending_calls", "Pending calls", buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000])
|
||||
|
||||
#
|
||||
# Federation Metrics
|
||||
#
|
||||
|
||||
sent_edus_counter = Counter("synapse_federation_client_sent_edus", "")
|
||||
|
||||
sent_transactions_counter = Counter("synapse_federation_client_sent_transactions", "")
|
||||
|
||||
events_processed_counter = Counter("synapse_federation_client_events_processed", "")
|
||||
|
||||
# Used to track where various components have processed in the event stream,
|
||||
# e.g. federation sending, appservice sending, etc.
|
||||
event_processing_positions = synapse_metrics.register_gauge(
|
||||
"event_processing_positions", labels=["name"],
|
||||
)
|
||||
event_processing_positions = Gauge("synapse_event_processing_positions", "", ["name"])
|
||||
|
||||
# Used to track the current max events stream position
|
||||
event_persisted_position = synapse_metrics.register_gauge(
|
||||
"event_persisted_position",
|
||||
)
|
||||
event_persisted_position = Gauge("synapse_event_persisted_position", "")
|
||||
|
||||
# Used to track the received_ts of the last event processed by various
|
||||
# components
|
||||
event_processing_last_ts = synapse_metrics.register_gauge(
|
||||
"event_processing_last_ts", labels=["name"],
|
||||
)
|
||||
event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name"])
|
||||
|
||||
# Used to track the lag processing events. This is the time difference
|
||||
# between the last processed event's received_ts and the time it was
|
||||
# finished being processed.
|
||||
event_processing_lag = synapse_metrics.register_gauge(
|
||||
"event_processing_lag", labels=["name"],
|
||||
)
|
||||
|
||||
event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"])
|
||||
|
||||
def runUntilCurrentTimer(func):
|
||||
|
||||
|
@ -206,8 +150,8 @@ def runUntilCurrentTimer(func):
|
|||
# since about 25% of time is actually spent running things triggered by
|
||||
# I/O events, but that is harder to capture without rewriting half the
|
||||
# reactor.
|
||||
tick_time.inc_by(end - start)
|
||||
pending_calls_metric.inc_by(num_pending)
|
||||
tick_time.observe(end - start)
|
||||
pending_calls_metric.observe(num_pending)
|
||||
|
||||
if running_on_pypy:
|
||||
return ret
|
||||
|
@ -224,8 +168,8 @@ def runUntilCurrentTimer(func):
|
|||
unreachable = gc.collect(i)
|
||||
end = time.time() * 1000
|
||||
|
||||
gc_time.inc_by(end - start, i)
|
||||
gc_unreachable.inc_by(unreachable, i)
|
||||
gc_time.labels(i).observe(end - start)
|
||||
gc_unreachable.labels(i).set(unreachable)
|
||||
|
||||
return ret
|
||||
|
||||
|
|
|
@ -28,22 +28,19 @@ from synapse.util.logcontext import PreserveLoggingContext, run_in_background
|
|||
from synapse.util.metrics import Measure
|
||||
from synapse.types import StreamToken
|
||||
from synapse.visibility import filter_events_for_client
|
||||
import synapse.metrics
|
||||
from synapse.metrics import LaterGauge
|
||||
|
||||
from collections import namedtuple
|
||||
from prometheus_client import Counter
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
notified_events_counter = Counter("synapse_notifier_notified_events", "")
|
||||
|
||||
notified_events_counter = metrics.register_counter("notified_events")
|
||||
|
||||
users_woken_by_stream_counter = metrics.register_counter(
|
||||
"users_woken_by_stream", labels=["stream"]
|
||||
)
|
||||
users_woken_by_stream_counter = Counter("synapse_notifier_users_woken_by_stream", "", ["stream"])
|
||||
|
||||
|
||||
# TODO(paul): Should be shared somewhere
|
||||
|
@ -108,7 +105,7 @@ class _NotifierUserStream(object):
|
|||
self.last_notified_ms = time_now_ms
|
||||
noify_deferred = self.notify_deferred
|
||||
|
||||
users_woken_by_stream_counter.inc(stream_key)
|
||||
users_woken_by_stream_counter.labels(stream_key).inc()
|
||||
|
||||
with PreserveLoggingContext():
|
||||
self.notify_deferred = ObservableDeferred(defer.Deferred())
|
||||
|
@ -197,14 +194,14 @@ class Notifier(object):
|
|||
all_user_streams.add(x)
|
||||
|
||||
return sum(stream.count_listeners() for stream in all_user_streams)
|
||||
metrics.register_callback("listeners", count_listeners)
|
||||
LaterGauge("listeners", "", [], count_listeners)
|
||||
|
||||
metrics.register_callback(
|
||||
"rooms",
|
||||
LaterGauge(
|
||||
"rooms", "", [],
|
||||
lambda: count(bool, self.room_to_user_streams.values()),
|
||||
)
|
||||
metrics.register_callback(
|
||||
"users",
|
||||
LaterGauge(
|
||||
"users", "", [],
|
||||
lambda: len(self.user_to_user_stream),
|
||||
)
|
||||
|
||||
|
|
|
@ -20,22 +20,17 @@ from twisted.internet.error import AlreadyCalled, AlreadyCancelled
|
|||
|
||||
from . import push_rule_evaluator
|
||||
from . import push_tools
|
||||
import synapse
|
||||
from synapse.push import PusherConfigException
|
||||
from synapse.util.logcontext import LoggingContext
|
||||
from synapse.util.metrics import Measure
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
http_push_processed_counter = Counter("http_pushes_processed", "")
|
||||
|
||||
http_push_processed_counter = metrics.register_counter(
|
||||
"http_pushes_processed",
|
||||
)
|
||||
|
||||
http_push_failed_counter = metrics.register_counter(
|
||||
"http_pushes_failed",
|
||||
)
|
||||
http_push_failed_counter = Counter("http_pushes_failed", "")
|
||||
|
||||
|
||||
class HttpPusher(object):
|
||||
|
|
|
@ -150,7 +150,7 @@ class PushRuleEvaluatorForEvent(object):
|
|||
|
||||
# Caches (glob, word_boundary) -> regex for push. See _glob_matches
|
||||
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
|
||||
register_cache("regex_push_cache", regex_cache)
|
||||
register_cache("cache", "regex_push_cache", regex_cache)
|
||||
|
||||
|
||||
def _glob_matches(glob, value, word_boundary=False):
|
||||
|
|
|
@ -56,6 +56,7 @@ REQUIREMENTS = {
|
|||
"msgpack-python>=0.3.0": ["msgpack"],
|
||||
"phonenumbers>=8.2.0": ["phonenumbers"],
|
||||
"six": ["six"],
|
||||
"prometheus_client": ["prometheus_client"],
|
||||
}
|
||||
CONDITIONAL_REQUIREMENTS = {
|
||||
"web_client": {
|
||||
|
|
|
@ -60,19 +60,19 @@ from .commands import (
|
|||
)
|
||||
from .streams import STREAMS_MAP
|
||||
|
||||
from synapse.metrics import LaterGauge
|
||||
from synapse.util.stringutils import random_string
|
||||
from synapse.metrics.metric import CounterMetric
|
||||
|
||||
from prometheus_client import Counter
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import logging
|
||||
import synapse.metrics
|
||||
import struct
|
||||
import fcntl
|
||||
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
|
||||
connection_close_counter = metrics.register_counter(
|
||||
"close_reason", labels=["reason_type"],
|
||||
connection_close_counter = Counter(
|
||||
"synapse_replication_tcp_protocol_close_reason", "", ["reason_type"],
|
||||
)
|
||||
|
||||
|
||||
|
@ -136,12 +136,8 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
|
|||
# The LoopingCall for sending pings.
|
||||
self._send_ping_loop = None
|
||||
|
||||
self.inbound_commands_counter = CounterMetric(
|
||||
"inbound_commands", labels=["command"],
|
||||
)
|
||||
self.outbound_commands_counter = CounterMetric(
|
||||
"outbound_commands", labels=["command"],
|
||||
)
|
||||
self.inbound_commands_counter = defaultdict(int)
|
||||
self.outbound_commands_counter = defaultdict(int)
|
||||
|
||||
def connectionMade(self):
|
||||
logger.info("[%s] Connection established", self.id())
|
||||
|
@ -201,7 +197,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
|
|||
|
||||
self.last_received_command = self.clock.time_msec()
|
||||
|
||||
self.inbound_commands_counter.inc(cmd_name)
|
||||
self.inbound_commands_counter[cmd_name] = self.inbound_commands_counter[cmd_name] + 1
|
||||
|
||||
cmd_cls = COMMAND_MAP[cmd_name]
|
||||
try:
|
||||
|
@ -251,8 +247,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
|
|||
self._queue_command(cmd)
|
||||
return
|
||||
|
||||
self.outbound_commands_counter.inc(cmd.NAME)
|
||||
|
||||
self.outbound_commands_counter[cmd.NAME] = self.outbound_commands_counter[cmd.NAME] + 1
|
||||
string = "%s %s" % (cmd.NAME, cmd.to_line(),)
|
||||
if "\n" in string:
|
||||
raise Exception("Unexpected newline in command: %r", string)
|
||||
|
@ -317,9 +312,9 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
|
|||
def connectionLost(self, reason):
|
||||
logger.info("[%s] Replication connection closed: %r", self.id(), reason)
|
||||
if isinstance(reason, Failure):
|
||||
connection_close_counter.inc(reason.type.__name__)
|
||||
connection_close_counter.labels(reason.type.__name__).inc()
|
||||
else:
|
||||
connection_close_counter.inc(reason.__class__.__name__)
|
||||
connection_close_counter.labels(reason.__class__.__name__).inc()
|
||||
|
||||
try:
|
||||
# Remove us from list of connections to be monitored
|
||||
|
@ -566,14 +561,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
|
|||
|
||||
# The following simply registers metrics for the replication connections
|
||||
|
||||
metrics.register_callback(
|
||||
"pending_commands",
|
||||
pending_commands = LaterGauge(
|
||||
"pending_commands", "", ["name", "conn_id"],
|
||||
lambda: {
|
||||
(p.name, p.conn_id): len(p.pending_commands)
|
||||
for p in connected_connections
|
||||
},
|
||||
labels=["name", "conn_id"],
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
def transport_buffer_size(protocol):
|
||||
|
@ -583,14 +576,12 @@ def transport_buffer_size(protocol):
|
|||
return 0
|
||||
|
||||
|
||||
metrics.register_callback(
|
||||
"transport_send_buffer",
|
||||
transport_send_buffer = LaterGauge(
|
||||
"synapse_replication_tcp_transport_send_buffer", "", ["name", "conn_id"],
|
||||
lambda: {
|
||||
(p.name, p.conn_id): transport_buffer_size(p)
|
||||
for p in connected_connections
|
||||
},
|
||||
labels=["name", "conn_id"],
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
def transport_kernel_read_buffer_size(protocol, read=True):
|
||||
|
@ -608,48 +599,37 @@ def transport_kernel_read_buffer_size(protocol, read=True):
|
|||
return 0
|
||||
|
||||
|
||||
metrics.register_callback(
|
||||
"transport_kernel_send_buffer",
|
||||
tcp_transport_kernel_send_buffer = LaterGauge(
|
||||
"synapse_replication_tcp_transport_kernel_send_buffer", "", ["name", "conn_id"],
|
||||
lambda: {
|
||||
(p.name, p.conn_id): transport_kernel_read_buffer_size(p, False)
|
||||
for p in connected_connections
|
||||
},
|
||||
labels=["name", "conn_id"],
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
metrics.register_callback(
|
||||
"transport_kernel_read_buffer",
|
||||
tcp_transport_kernel_read_buffer = LaterGauge(
|
||||
"synapse_replication_tcp_transport_kernel_read_buffer", "", ["name", "conn_id"],
|
||||
lambda: {
|
||||
(p.name, p.conn_id): transport_kernel_read_buffer_size(p, True)
|
||||
for p in connected_connections
|
||||
},
|
||||
labels=["name", "conn_id"],
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
metrics.register_callback(
|
||||
"inbound_commands",
|
||||
tcp_inbound_commands = LaterGauge(
|
||||
"synapse_replication_tcp_inbound_commands", "", ["command", "name", "conn_id"],
|
||||
lambda: {
|
||||
(k[0], p.name, p.conn_id): count
|
||||
for p in connected_connections
|
||||
for k, count in p.inbound_commands_counter.counts.iteritems()
|
||||
},
|
||||
labels=["command", "name", "conn_id"],
|
||||
)
|
||||
for k, count in p.inbound_commands_counter.items()
|
||||
})
|
||||
|
||||
metrics.register_callback(
|
||||
"outbound_commands",
|
||||
tcp_outbound_commands = LaterGauge(
|
||||
"synapse_replication_tcp_outbound_commands", "", ["command", "name", "conn_id"],
|
||||
lambda: {
|
||||
(k[0], p.name, p.conn_id): count
|
||||
for p in connected_connections
|
||||
for k, count in p.outbound_commands_counter.counts.iteritems()
|
||||
},
|
||||
labels=["command", "name", "conn_id"],
|
||||
)
|
||||
for k, count in p.outbound_commands_counter.items()
|
||||
})
|
||||
|
||||
# number of updates received for each RDATA stream
|
||||
inbound_rdata_count = metrics.register_counter(
|
||||
"inbound_rdata_count",
|
||||
labels=["stream_name"],
|
||||
)
|
||||
inbound_rdata_count = Counter("synapse_replication_tcp_inbound_rdata_count", "", ["stream_name"])
|
||||
|
|
|
@ -13,29 +13,52 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import synapse.metrics
|
||||
from prometheus_client.core import GaugeMetricFamily, REGISTRY
|
||||
|
||||
import os
|
||||
|
||||
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for("synapse.util.caches")
|
||||
|
||||
caches_by_name = {}
|
||||
# cache_counter = metrics.register_cache(
|
||||
# "cache",
|
||||
# lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()},
|
||||
# labels=["name"],
|
||||
# )
|
||||
collectors_by_name = {}
|
||||
|
||||
def register_cache(name, cache_name, cache):
|
||||
|
||||
def register_cache(name, cache):
|
||||
caches_by_name[name] = cache
|
||||
return metrics.register_cache(
|
||||
"cache",
|
||||
lambda: len(cache),
|
||||
name,
|
||||
)
|
||||
# Check if the metric is already registered. Unregister it, if so.
|
||||
metric_name = "synapse_util_caches_%s:%s" % (name, cache_name,)
|
||||
if metric_name in collectors_by_name.keys():
|
||||
REGISTRY.unregister(collectors_by_name[metric_name])
|
||||
|
||||
class CacheMetric(object):
|
||||
|
||||
hits = 0
|
||||
misses = 0
|
||||
evicted_size = 0
|
||||
|
||||
def inc_hits(self):
|
||||
self.hits += 1
|
||||
|
||||
def inc_misses(self):
|
||||
self.misses += 1
|
||||
|
||||
def inc_evictions(self, size=1):
|
||||
self.evicted_size += size
|
||||
|
||||
def collect(self):
|
||||
cache_size = len(cache)
|
||||
|
||||
gm = GaugeMetricFamily(metric_name, "", labels=["size", "hits", "misses", "total"])
|
||||
gm.add_metric(["size"], cache_size)
|
||||
gm.add_metric(["hits"], self.hits)
|
||||
gm.add_metric(["misses"], self.misses)
|
||||
gm.add_metric(["total"], self.hits + self.misses)
|
||||
yield gm
|
||||
|
||||
metric = CacheMetric()
|
||||
REGISTRY.register(metric)
|
||||
caches_by_name[cache_name] = cache
|
||||
collectors_by_name[metric_name] = metric
|
||||
return metric
|
||||
|
||||
KNOWN_KEYS = {
|
||||
key: key for key in
|
||||
|
|
|
@ -80,7 +80,7 @@ class Cache(object):
|
|||
self.name = name
|
||||
self.keylen = keylen
|
||||
self.thread = None
|
||||
self.metrics = register_cache(name, self.cache)
|
||||
self.metrics = register_cache("descriptor", name, self.cache)
|
||||
|
||||
def _on_evicted(self, evicted_count):
|
||||
self.metrics.inc_evictions(evicted_count)
|
||||
|
|
|
@ -55,7 +55,7 @@ class DictionaryCache(object):
|
|||
__slots__ = []
|
||||
|
||||
self.sentinel = Sentinel()
|
||||
self.metrics = register_cache(name, self.cache)
|
||||
self.metrics = register_cache("dictionary", name, self.cache)
|
||||
|
||||
def check_thread(self):
|
||||
expected_thread = self.thread
|
||||
|
|
|
@ -52,12 +52,12 @@ class ExpiringCache(object):
|
|||
|
||||
self._cache = OrderedDict()
|
||||
|
||||
self.metrics = register_cache(cache_name, self)
|
||||
|
||||
self.iterable = iterable
|
||||
|
||||
self._size_estimate = 0
|
||||
|
||||
self.metrics = register_cache("expiring", cache_name, self)
|
||||
|
||||
def start(self):
|
||||
if not self._expiry_ms:
|
||||
# Don't bother starting the loop if things never expire
|
||||
|
|
|
@ -17,7 +17,7 @@ import logging
|
|||
from twisted.internet import defer
|
||||
|
||||
from synapse.util.async import ObservableDeferred
|
||||
from synapse.util.caches import metrics as cache_metrics
|
||||
from synapse.util.caches import register_cache
|
||||
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -38,15 +38,16 @@ class ResponseCache(object):
|
|||
self.timeout_sec = timeout_ms / 1000.
|
||||
|
||||
self._name = name
|
||||
self._metrics = cache_metrics.register_cache(
|
||||
"response_cache",
|
||||
size_callback=lambda: self.size(),
|
||||
cache_name=name,
|
||||
self._metrics = register_cache(
|
||||
"response_cache", name, self
|
||||
)
|
||||
|
||||
def size(self):
|
||||
return len(self.pending_result_cache)
|
||||
|
||||
def __len__(self):
|
||||
return self.size()
|
||||
|
||||
def get(self, key):
|
||||
"""Look up the given key.
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ class StreamChangeCache(object):
|
|||
self._cache = sorteddict()
|
||||
self._earliest_known_stream_pos = current_stream_pos
|
||||
self.name = name
|
||||
self.metrics = register_cache(self.name, self._cache)
|
||||
self.metrics = register_cache("cache", self.name, self._cache)
|
||||
|
||||
for entity, stream_pos in prefilled_cache.items():
|
||||
self.entity_has_changed(entity, stream_pos)
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
|
||||
from twisted.internet import defer
|
||||
|
||||
from prometheus_client import Counter
|
||||
from synapse.util.logcontext import LoggingContext
|
||||
import synapse.metrics
|
||||
|
||||
from functools import wraps
|
||||
import logging
|
||||
|
@ -24,66 +24,21 @@ import logging
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
|
||||
|
||||
metrics = synapse.metrics.get_metrics_for(__name__)
|
||||
block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
|
||||
|
||||
# total number of times we have hit this block
|
||||
block_counter = metrics.register_counter(
|
||||
"block_count",
|
||||
labels=["block_name"],
|
||||
alternative_names=(
|
||||
# the following are all deprecated aliases for the same metric
|
||||
metrics.name_prefix + x for x in (
|
||||
"_block_timer:count",
|
||||
"_block_ru_utime:count",
|
||||
"_block_ru_stime:count",
|
||||
"_block_db_txn_count:count",
|
||||
"_block_db_txn_duration:count",
|
||||
)
|
||||
)
|
||||
)
|
||||
block_ru_utime = Counter("synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"])
|
||||
|
||||
block_timer = metrics.register_counter(
|
||||
"block_time_seconds",
|
||||
labels=["block_name"],
|
||||
alternative_names=(
|
||||
metrics.name_prefix + "_block_timer:total",
|
||||
),
|
||||
)
|
||||
block_ru_stime = Counter("synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"])
|
||||
|
||||
block_ru_utime = metrics.register_counter(
|
||||
"block_ru_utime_seconds", labels=["block_name"],
|
||||
alternative_names=(
|
||||
metrics.name_prefix + "_block_ru_utime:total",
|
||||
),
|
||||
)
|
||||
|
||||
block_ru_stime = metrics.register_counter(
|
||||
"block_ru_stime_seconds", labels=["block_name"],
|
||||
alternative_names=(
|
||||
metrics.name_prefix + "_block_ru_stime:total",
|
||||
),
|
||||
)
|
||||
|
||||
block_db_txn_count = metrics.register_counter(
|
||||
"block_db_txn_count", labels=["block_name"],
|
||||
alternative_names=(
|
||||
metrics.name_prefix + "_block_db_txn_count:total",
|
||||
),
|
||||
)
|
||||
block_db_txn_count = Counter("synapse_util_metrics_block_db_txn_count", "", ["block_name"])
|
||||
|
||||
# seconds spent waiting for db txns, excluding scheduling time, in this block
|
||||
block_db_txn_duration = metrics.register_counter(
|
||||
"block_db_txn_duration_seconds", labels=["block_name"],
|
||||
alternative_names=(
|
||||
metrics.name_prefix + "_block_db_txn_duration:total",
|
||||
),
|
||||
)
|
||||
block_db_txn_duration = Counter("synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"])
|
||||
|
||||
# seconds spent waiting for a db connection, in this block
|
||||
block_db_sched_duration = metrics.register_counter(
|
||||
"block_db_sched_duration_seconds", labels=["block_name"],
|
||||
)
|
||||
block_db_sched_duration = Counter("synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"])
|
||||
|
||||
|
||||
def measure_func(name):
|
||||
|
@ -132,8 +87,8 @@ class Measure(object):
|
|||
|
||||
duration = self.clock.time_msec() - self.start
|
||||
|
||||
block_counter.inc(self.name)
|
||||
block_timer.inc_by(duration, self.name)
|
||||
block_counter.labels(self.name).inc()
|
||||
block_timer.labels(self.name).inc(duration)
|
||||
|
||||
context = LoggingContext.current_context()
|
||||
|
||||
|
@ -150,19 +105,13 @@ class Measure(object):
|
|||
|
||||
ru_utime, ru_stime = context.get_resource_usage()
|
||||
|
||||
block_ru_utime.inc_by(ru_utime - self.ru_utime, self.name)
|
||||
block_ru_stime.inc_by(ru_stime - self.ru_stime, self.name)
|
||||
block_db_txn_count.inc_by(
|
||||
context.db_txn_count - self.db_txn_count, self.name
|
||||
)
|
||||
block_db_txn_duration.inc_by(
|
||||
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.,
|
||||
self.name
|
||||
)
|
||||
block_db_sched_duration.inc_by(
|
||||
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.,
|
||||
self.name
|
||||
)
|
||||
block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime)
|
||||
block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime)
|
||||
block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count)
|
||||
block_db_txn_duration.labels(self.name).inc(
|
||||
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.)
|
||||
block_db_sched_duration.labels(self.name).inc(
|
||||
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.)
|
||||
|
||||
if self.created_context:
|
||||
self.start_context.__exit__(exc_type, exc_val, exc_tb)
|
||||
|
|
|
@ -12,3 +12,6 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from twisted.trial import util
|
||||
util.DEFAULT_TIMEOUT_DURATION = 10
|
||||
|
|
Loading…
Reference in a new issue