forked from MirrorHub/synapse
Add metrics to track how often events are soft_failed
(#10156)
Spawned from missing messages we were seeing on `matrix.org` from a federated Gtiter bridged room, https://gitlab.com/gitterHQ/webapp/-/issues/2770. The underlying issue in Synapse is tracked by https://github.com/matrix-org/synapse/issues/10066 where the message and join event race and the message is `soft_failed` before the `join` event reaches the remote federated server. Less soft_failed events = better and usually this should only trigger for events where people are doing bad things and trying to fuzz and fake everything.
This commit is contained in:
parent
e21c347332
commit
b31daac01c
2 changed files with 8 additions and 0 deletions
1
changelog.d/10156.misc
Normal file
1
changelog.d/10156.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Add `synapse_federation_soft_failed_events_total` metric to track how often events are soft failed.
|
|
@ -33,6 +33,7 @@ from typing import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
from prometheus_client import Counter
|
||||||
from signedjson.key import decode_verify_key_bytes
|
from signedjson.key import decode_verify_key_bytes
|
||||||
from signedjson.sign import verify_signed_json
|
from signedjson.sign import verify_signed_json
|
||||||
from unpaddedbase64 import decode_base64
|
from unpaddedbase64 import decode_base64
|
||||||
|
@ -101,6 +102,11 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
soft_failed_event_counter = Counter(
|
||||||
|
"synapse_federation_soft_failed_events_total",
|
||||||
|
"Events received over federation that we marked as soft_failed",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@attr.s(slots=True)
|
@attr.s(slots=True)
|
||||||
class _NewEventInfo:
|
class _NewEventInfo:
|
||||||
|
@ -2498,6 +2504,7 @@ class FederationHandler(BaseHandler):
|
||||||
event_auth.check(room_version_obj, event, auth_events=current_auth_events)
|
event_auth.check(room_version_obj, event, auth_events=current_auth_events)
|
||||||
except AuthError as e:
|
except AuthError as e:
|
||||||
logger.warning("Soft-failing %r because %s", event, e)
|
logger.warning("Soft-failing %r because %s", event, e)
|
||||||
|
soft_failed_event_counter.inc()
|
||||||
event.internal_metadata.soft_failed = True
|
event.internal_metadata.soft_failed = True
|
||||||
|
|
||||||
async def on_get_missing_events(
|
async def on_get_missing_events(
|
||||||
|
|
Loading…
Reference in a new issue