Run Prometheus on a different port, optionally. (#3274)

This commit is contained in:
Amber Brown 2018-05-31 19:04:50 +10:00 committed by GitHub
parent c936a52a9e
commit febe0ec8fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 192 additions and 26 deletions

View file

@ -1,25 +1,47 @@
How to monitor Synapse metrics using Prometheus
===============================================
1. Install prometheus:
1. Install Prometheus:
Follow instructions at http://prometheus.io/docs/introduction/install/
2. Enable synapse metrics:
2. Enable Synapse metrics:
Simply setting a (local) port number will enable it. Pick a port.
prometheus itself defaults to 9090, so starting just above that for
locally monitored services seems reasonable. E.g. 9092:
There are two methods of enabling metrics in Synapse.
Add to homeserver.yaml::
The first serves the metrics as a part of the usual web server and can be
enabled by adding the "metrics" resource to the existing listener as such::
metrics_port: 9092
resources:
- names:
- client
- metrics
Also ensure that ``enable_metrics`` is set to ``True``.
This provides a simple way of adding metrics to your Synapse installation,
and serves under ``/_synapse/metrics``. If you do not wish your metrics be
publicly exposed, you will need to either filter it out at your load
balancer, or use the second method.
Restart synapse.
The second method runs the metrics server on a different port, in a
different thread to Synapse. This can make it more resilient to heavy load
meaning metrics cannot be retrieved, and can be exposed to just internal
networks easier. The served metrics are available over HTTP only, and will
be available at ``/``.
3. Add a prometheus target for synapse.
Add a new listener to homeserver.yaml::
listeners:
- type: metrics
port: 9000
bind_addresses:
- '0.0.0.0'
For both options, you will need to ensure that ``enable_metrics`` is set to
``True``.
Restart Synapse.
3. Add a Prometheus target for Synapse.
It needs to set the ``metrics_path`` to a non-default value (under ``scrape_configs``)::
@ -31,7 +53,40 @@ How to monitor Synapse metrics using Prometheus
If your prometheus is older than 1.5.2, you will need to replace
``static_configs`` in the above with ``target_groups``.
Restart prometheus.
Restart Prometheus.
Removal of deprecated metrics & time based counters becoming histograms in 0.31.0
---------------------------------------------------------------------------------
The duplicated metrics deprecated in Synapse 0.27.0 have been removed.
All time duration-based metrics have been changed to be seconds. This affects:
================================
msec -> sec metrics
================================
python_gc_time
python_twisted_reactor_tick_time
synapse_storage_query_time
synapse_storage_schedule_time
synapse_storage_transaction_time
================================
Several metrics have been changed to be histograms, which sort entries into
buckets and allow better analysis. The following metrics are now histograms:
=========================================
Altered metrics
=========================================
python_gc_time
python_twisted_reactor_pending_calls
python_twisted_reactor_tick_time
synapse_http_server_response_time_seconds
synapse_storage_query_time
synapse_storage_schedule_time
synapse_storage_transaction_time
=========================================
Block and response metrics renamed for 0.27.0

View file

@ -124,6 +124,19 @@ def quit_with_error(error_string):
sys.exit(1)
def listen_metrics(bind_addresses, port):
"""
Start Prometheus metrics server.
"""
from synapse.metrics import RegistryProxy
from prometheus_client import start_http_server
for host in bind_addresses:
reactor.callInThread(start_http_server, int(port),
addr=host, registry=RegistryProxy)
logger.info("Metrics now reporting on %s:%d", host, port)
def listen_tcp(bind_addresses, port, factory, backlog=50):
"""
Create a TCP socket for a port and several addresses

View file

@ -94,6 +94,13 @@ class AppserviceServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -25,6 +25,7 @@ from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.http.server import JsonResource
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@ -77,7 +78,7 @@ class ClientReaderServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "client":
resource = JsonResource(self, canonical_json=False)
PublicRoomListRestServlet(self).register(resource)
@ -118,7 +119,13 @@ class ClientReaderServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -25,6 +25,7 @@ from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.http.server import JsonResource
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
@ -90,7 +91,7 @@ class EventCreatorServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "client":
resource = JsonResource(self, canonical_json=False)
RoomSendEventRestServlet(self).register(resource)
@ -134,6 +135,13 @@ class EventCreatorServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -26,6 +26,7 @@ from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.federation.transport.server import TransportLayerServer
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.directory import DirectoryStore
@ -71,7 +72,7 @@ class FederationReaderServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "federation":
resources.update({
FEDERATION_PREFIX: TransportLayerServer(self),
@ -107,6 +108,13 @@ class FederationReaderServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -25,6 +25,7 @@ from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.federation import send_queue
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
from synapse.replication.slave.storage.devices import SlavedDeviceStore
@ -89,7 +90,7 @@ class FederationSenderServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
root_resource = create_resource_tree(resources, NoResource())
@ -121,6 +122,13 @@ class FederationSenderServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -29,6 +29,7 @@ from synapse.http.servlet import (
RestServlet, parse_json_object_from_request,
)
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@ -131,7 +132,7 @@ class FrontendProxyServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "client":
resource = JsonResource(self, canonical_json=False)
KeyUploadServlet(self).register(resource)
@ -172,6 +173,13 @@ class FrontendProxyServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -35,7 +35,7 @@ from synapse.http.additional_resource import AdditionalResource
from synapse.http.server import RootRedirect
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \
check_requirements
from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX
@ -61,8 +61,6 @@ from twisted.web.resource import EncodingResourceWrapper, NoResource
from twisted.web.server import GzipEncoderFactory
from twisted.web.static import File
from prometheus_client.twisted import MetricsResource
logger = logging.getLogger("synapse.app.homeserver")
@ -232,7 +230,7 @@ class SynapseHomeServer(HomeServer):
resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self)
if name == "metrics" and self.get_config().enable_metrics:
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy())
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
if name == "replication":
resources[REPLICATION_PREFIX] = ReplicationRestResource(self)
@ -265,6 +263,13 @@ class SynapseHomeServer(HomeServer):
reactor.addSystemEventTrigger(
"before", "shutdown", server_listener.stopListening,
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -27,6 +27,7 @@ from synapse.config.homeserver import HomeServerConfig
from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@ -73,7 +74,7 @@ class MediaRepositoryServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "media":
media_repo = self.get_media_repository_resource()
resources.update({
@ -114,6 +115,13 @@ class MediaRepositoryServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -23,6 +23,7 @@ from synapse.config._base import ConfigError
from synapse.config.homeserver import HomeServerConfig
from synapse.config.logger import setup_logging
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
from synapse.replication.slave.storage.events import SlavedEventStore
@ -92,7 +93,7 @@ class PusherServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
root_resource = create_resource_tree(resources, NoResource())
@ -124,6 +125,13 @@ class PusherServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -26,6 +26,7 @@ from synapse.config.logger import setup_logging
from synapse.handlers.presence import PresenceHandler, get_interested_parties
from synapse.http.server import JsonResource
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
@ -257,7 +258,7 @@ class SynchrotronServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "client":
resource = JsonResource(self, canonical_json=False)
sync.register_servlets(self, resource)
@ -301,6 +302,13 @@ class SynchrotronServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -26,6 +26,7 @@ from synapse.config.logger import setup_logging
from synapse.crypto import context_factory
from synapse.http.server import JsonResource
from synapse.http.site import SynapseSite
from synapse.metrics import RegistryProxy
from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@ -105,7 +106,7 @@ class UserDirectoryServer(HomeServer):
for res in listener_config["resources"]:
for name in res["names"]:
if name == "metrics":
resources[METRICS_PREFIX] = MetricsResource(self)
resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
elif name == "client":
resource = JsonResource(self, canonical_json=False)
user_directory.register_servlets(self, resource)
@ -146,6 +147,13 @@ class UserDirectoryServer(HomeServer):
globals={"hs": self},
)
)
elif listener["type"] == "metrics":
if not self.get_config().enable_metrics:
logger.warn(("Metrics listener configured, but "
"collect_metrics is not enabled!"))
else:
_base.listen_metrics(listener["bind_addresses"],
listener["port"])
else:
logger.warn("Unrecognized listener type: %s", listener["type"])

View file

@ -14,8 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from ._base import Config, ConfigError
logger = logging.Logger(__name__)
class ServerConfig(Config):
@ -138,6 +142,12 @@ class ServerConfig(Config):
metrics_port = config.get("metrics_port")
if metrics_port:
logger.warn(
("The metrics_port configuration option is deprecated in Synapse 0.31 "
"in favour of a listener. Please see "
"http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.rst"
" on how to configure the new listener."))
self.listeners.append({
"port": metrics_port,
"bind_addresses": [config.get("metrics_bind_host", "127.0.0.1")],

View file

@ -39,7 +39,8 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
class RegistryProxy(object):
def collect(self):
@staticmethod
def collect():
for metric in REGISTRY.collect():
if not metric.name.startswith("__"):
yield metric

View file

@ -13,4 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from prometheus_client.twisted import MetricsResource
METRICS_PREFIX = "/_synapse/metrics"
__all__ = ["MetricsResource", "METRICS_PREFIX"]