mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 19:13:51 +01:00
Add OpenTracing for database activity. (#10113)
This adds quite a lot of OpenTracing decoration for database activity. Specifically it adds tracing at four different levels: * emit a span for each "interaction" - ie, the top level database function that we tend to call "transaction", but isn't really, because it can end up as multiple transactions. * emit a span while we hold a database connection open * emit a span for each database transaction - actual actual transaction. * emit a span for each database query. I'm aware this might be quite a lot of overhead, but even just running it on a local Synapse it looks really interesting, and I hope the overhead can be offset just by turning down the sampling frequency and finding other ways of tracing requests of interest (eg, the `force_tracing_for_users` setting).
This commit is contained in:
parent
1d143074c5
commit
9eea4646be
3 changed files with 57 additions and 30 deletions
1
changelog.d/10113.feature
Normal file
1
changelog.d/10113.feature
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Report OpenTracing spans for database activity.
|
|
@ -271,6 +271,12 @@ class SynapseTags:
|
||||||
# HTTP request tag (used to distinguish full vs incremental syncs, etc)
|
# HTTP request tag (used to distinguish full vs incremental syncs, etc)
|
||||||
REQUEST_TAG = "request_tag"
|
REQUEST_TAG = "request_tag"
|
||||||
|
|
||||||
|
# Text description of a database transaction
|
||||||
|
DB_TXN_DESC = "db.txn_desc"
|
||||||
|
|
||||||
|
# Uniqueish ID of a database transaction
|
||||||
|
DB_TXN_ID = "db.txn_id"
|
||||||
|
|
||||||
|
|
||||||
# Block everything by default
|
# Block everything by default
|
||||||
# A regex which matches the server_names to expose traces for.
|
# A regex which matches the server_names to expose traces for.
|
||||||
|
|
|
@ -40,6 +40,7 @@ from twisted.enterprise import adbapi
|
||||||
|
|
||||||
from synapse.api.errors import StoreError
|
from synapse.api.errors import StoreError
|
||||||
from synapse.config.database import DatabaseConnectionConfig
|
from synapse.config.database import DatabaseConnectionConfig
|
||||||
|
from synapse.logging import opentracing
|
||||||
from synapse.logging.context import (
|
from synapse.logging.context import (
|
||||||
LoggingContext,
|
LoggingContext,
|
||||||
current_context,
|
current_context,
|
||||||
|
@ -313,7 +314,14 @@ class LoggingTransaction:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return func(sql, *args)
|
with opentracing.start_active_span(
|
||||||
|
"db.query",
|
||||||
|
tags={
|
||||||
|
opentracing.tags.DATABASE_TYPE: "sql",
|
||||||
|
opentracing.tags.DATABASE_STATEMENT: sql,
|
||||||
|
},
|
||||||
|
):
|
||||||
|
return func(sql, *args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e)
|
sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e)
|
||||||
raise
|
raise
|
||||||
|
@ -525,9 +533,16 @@ class DatabasePool:
|
||||||
exception_callbacks=exception_callbacks,
|
exception_callbacks=exception_callbacks,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
r = func(cursor, *args, **kwargs)
|
with opentracing.start_active_span(
|
||||||
conn.commit()
|
"db.txn",
|
||||||
return r
|
tags={
|
||||||
|
opentracing.SynapseTags.DB_TXN_DESC: desc,
|
||||||
|
opentracing.SynapseTags.DB_TXN_ID: name,
|
||||||
|
},
|
||||||
|
):
|
||||||
|
r = func(cursor, *args, **kwargs)
|
||||||
|
conn.commit()
|
||||||
|
return r
|
||||||
except self.engine.module.OperationalError as e:
|
except self.engine.module.OperationalError as e:
|
||||||
# This can happen if the database disappears mid
|
# This can happen if the database disappears mid
|
||||||
# transaction.
|
# transaction.
|
||||||
|
@ -653,16 +668,17 @@ class DatabasePool:
|
||||||
logger.warning("Starting db txn '%s' from sentinel context", desc)
|
logger.warning("Starting db txn '%s' from sentinel context", desc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = await self.runWithConnection(
|
with opentracing.start_active_span(f"db.{desc}"):
|
||||||
self.new_transaction,
|
result = await self.runWithConnection(
|
||||||
desc,
|
self.new_transaction,
|
||||||
after_callbacks,
|
desc,
|
||||||
exception_callbacks,
|
after_callbacks,
|
||||||
func,
|
exception_callbacks,
|
||||||
*args,
|
func,
|
||||||
db_autocommit=db_autocommit,
|
*args,
|
||||||
**kwargs,
|
db_autocommit=db_autocommit,
|
||||||
)
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
for after_callback, after_args, after_kwargs in after_callbacks:
|
for after_callback, after_args, after_kwargs in after_callbacks:
|
||||||
after_callback(*after_args, **after_kwargs)
|
after_callback(*after_args, **after_kwargs)
|
||||||
|
@ -718,25 +734,29 @@ class DatabasePool:
|
||||||
with LoggingContext(
|
with LoggingContext(
|
||||||
str(curr_context), parent_context=parent_context
|
str(curr_context), parent_context=parent_context
|
||||||
) as context:
|
) as context:
|
||||||
sched_duration_sec = monotonic_time() - start_time
|
with opentracing.start_active_span(
|
||||||
sql_scheduling_timer.observe(sched_duration_sec)
|
operation_name="db.connection",
|
||||||
context.add_database_scheduled(sched_duration_sec)
|
):
|
||||||
|
sched_duration_sec = monotonic_time() - start_time
|
||||||
|
sql_scheduling_timer.observe(sched_duration_sec)
|
||||||
|
context.add_database_scheduled(sched_duration_sec)
|
||||||
|
|
||||||
if self.engine.is_connection_closed(conn):
|
if self.engine.is_connection_closed(conn):
|
||||||
logger.debug("Reconnecting closed database connection")
|
logger.debug("Reconnecting closed database connection")
|
||||||
conn.reconnect()
|
conn.reconnect()
|
||||||
|
opentracing.log_kv({"message": "reconnected"})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if db_autocommit:
|
if db_autocommit:
|
||||||
self.engine.attempt_to_set_autocommit(conn, True)
|
self.engine.attempt_to_set_autocommit(conn, True)
|
||||||
|
|
||||||
db_conn = LoggingDatabaseConnection(
|
db_conn = LoggingDatabaseConnection(
|
||||||
conn, self.engine, "runWithConnection"
|
conn, self.engine, "runWithConnection"
|
||||||
)
|
)
|
||||||
return func(db_conn, *args, **kwargs)
|
return func(db_conn, *args, **kwargs)
|
||||||
finally:
|
finally:
|
||||||
if db_autocommit:
|
if db_autocommit:
|
||||||
self.engine.attempt_to_set_autocommit(conn, False)
|
self.engine.attempt_to_set_autocommit(conn, False)
|
||||||
|
|
||||||
return await make_deferred_yieldable(
|
return await make_deferred_yieldable(
|
||||||
self._db_pool.runWithConnection(inner_func, *args, **kwargs)
|
self._db_pool.runWithConnection(inner_func, *args, **kwargs)
|
||||||
|
|
Loading…
Reference in a new issue