0
0
Fork 1
mirror of https://mau.dev/maunium/synapse.git synced 2024-12-14 16:53:53 +01:00

Improve logging when processing incoming transactions (#9596)

Put the room id in the logcontext, to make it easier to understand what's going on.
This commit is contained in:
Richard van der Hoff 2021-03-12 15:08:03 +00:00 committed by GitHub
parent 464e5da7b2
commit 2b328d7e02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 73 deletions

1
changelog.d/9596.misc Normal file
View file

@ -0,0 +1 @@
Improve logging when processing incoming transactions.

View file

@ -335,34 +335,41 @@ class FederationServer(FederationBase):
# impose a limit to avoid going too crazy with ram/cpu. # impose a limit to avoid going too crazy with ram/cpu.
async def process_pdus_for_room(room_id: str): async def process_pdus_for_room(room_id: str):
logger.debug("Processing PDUs for %s", room_id) with nested_logging_context(room_id):
try: logger.debug("Processing PDUs for %s", room_id)
await self.check_server_matches_acl(origin_host, room_id)
except AuthError as e:
logger.warning("Ignoring PDUs for room %s from banned server", room_id)
for pdu in pdus_by_room[room_id]:
event_id = pdu.event_id
pdu_results[event_id] = e.error_dict()
return
for pdu in pdus_by_room[room_id]: try:
event_id = pdu.event_id await self.check_server_matches_acl(origin_host, room_id)
with pdu_process_time.time(): except AuthError as e:
with nested_logging_context(event_id): logger.warning(
try: "Ignoring PDUs for room %s from banned server", room_id
await self._handle_received_pdu(origin, pdu) )
pdu_results[event_id] = {} for pdu in pdus_by_room[room_id]:
except FederationError as e: event_id = pdu.event_id
logger.warning("Error handling PDU %s: %s", event_id, e) pdu_results[event_id] = e.error_dict()
pdu_results[event_id] = {"error": str(e)} return
except Exception as e:
f = failure.Failure() for pdu in pdus_by_room[room_id]:
pdu_results[event_id] = {"error": str(e)} pdu_results[pdu.event_id] = await process_pdu(pdu)
logger.error(
"Failed to handle PDU %s", async def process_pdu(pdu: EventBase) -> JsonDict:
event_id, event_id = pdu.event_id
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore with pdu_process_time.time():
) with nested_logging_context(event_id):
try:
await self._handle_received_pdu(origin, pdu)
return {}
except FederationError as e:
logger.warning("Error handling PDU %s: %s", event_id, e)
return {"error": str(e)}
except Exception as e:
f = failure.Failure()
logger.error(
"Failed to handle PDU %s",
event_id,
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore
)
return {"error": str(e)}
await concurrently_execute( await concurrently_execute(
process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT

View file

@ -201,7 +201,7 @@ class FederationHandler(BaseHandler):
or pdu.internal_metadata.is_outlier() or pdu.internal_metadata.is_outlier()
) )
if already_seen: if already_seen:
logger.debug("[%s %s]: Already seen pdu", room_id, event_id) logger.debug("Already seen pdu")
return return
# do some initial sanity-checking of the event. In particular, make # do some initial sanity-checking of the event. In particular, make
@ -210,18 +210,14 @@ class FederationHandler(BaseHandler):
try: try:
self._sanity_check_event(pdu) self._sanity_check_event(pdu)
except SynapseError as err: except SynapseError as err:
logger.warning( logger.warning("Received event failed sanity checks")
"[%s %s] Received event failed sanity checks", room_id, event_id
)
raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id) raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id)
# If we are currently in the process of joining this room, then we # If we are currently in the process of joining this room, then we
# queue up events for later processing. # queue up events for later processing.
if room_id in self.room_queues: if room_id in self.room_queues:
logger.info( logger.info(
"[%s %s] Queuing PDU from %s for now: join in progress", "Queuing PDU from %s for now: join in progress",
room_id,
event_id,
origin, origin,
) )
self.room_queues[room_id].append((pdu, origin)) self.room_queues[room_id].append((pdu, origin))
@ -236,9 +232,7 @@ class FederationHandler(BaseHandler):
is_in_room = await self.auth.check_host_in_room(room_id, self.server_name) is_in_room = await self.auth.check_host_in_room(room_id, self.server_name)
if not is_in_room: if not is_in_room:
logger.info( logger.info(
"[%s %s] Ignoring PDU from %s as we're not in the room", "Ignoring PDU from %s as we're not in the room",
room_id,
event_id,
origin, origin,
) )
return None return None
@ -250,7 +244,7 @@ class FederationHandler(BaseHandler):
# We only backfill backwards to the min depth. # We only backfill backwards to the min depth.
min_depth = await self.get_min_depth_for_context(pdu.room_id) min_depth = await self.get_min_depth_for_context(pdu.room_id)
logger.debug("[%s %s] min_depth: %d", room_id, event_id, min_depth) logger.debug("min_depth: %d", min_depth)
prevs = set(pdu.prev_event_ids()) prevs = set(pdu.prev_event_ids())
seen = await self.store.have_events_in_timeline(prevs) seen = await self.store.have_events_in_timeline(prevs)
@ -267,17 +261,13 @@ class FederationHandler(BaseHandler):
# If we're missing stuff, ensure we only fetch stuff one # If we're missing stuff, ensure we only fetch stuff one
# at a time. # at a time.
logger.info( logger.info(
"[%s %s] Acquiring room lock to fetch %d missing prev_events: %s", "Acquiring room lock to fetch %d missing prev_events: %s",
room_id,
event_id,
len(missing_prevs), len(missing_prevs),
shortstr(missing_prevs), shortstr(missing_prevs),
) )
with (await self._room_pdu_linearizer.queue(pdu.room_id)): with (await self._room_pdu_linearizer.queue(pdu.room_id)):
logger.info( logger.info(
"[%s %s] Acquired room lock to fetch %d missing prev_events", "Acquired room lock to fetch %d missing prev_events",
room_id,
event_id,
len(missing_prevs), len(missing_prevs),
) )
@ -297,9 +287,7 @@ class FederationHandler(BaseHandler):
if not prevs - seen: if not prevs - seen:
logger.info( logger.info(
"[%s %s] Found all missing prev_events", "Found all missing prev_events",
room_id,
event_id,
) )
if prevs - seen: if prevs - seen:
@ -329,9 +317,7 @@ class FederationHandler(BaseHandler):
if sent_to_us_directly: if sent_to_us_directly:
logger.warning( logger.warning(
"[%s %s] Rejecting: failed to fetch %d prev events: %s", "Rejecting: failed to fetch %d prev events: %s",
room_id,
event_id,
len(prevs - seen), len(prevs - seen),
shortstr(prevs - seen), shortstr(prevs - seen),
) )
@ -414,10 +400,7 @@ class FederationHandler(BaseHandler):
state = [event_map[e] for e in state_map.values()] state = [event_map[e] for e in state_map.values()]
except Exception: except Exception:
logger.warning( logger.warning(
"[%s %s] Error attempting to resolve state at missing " "Error attempting to resolve state at missing " "prev_events",
"prev_events",
room_id,
event_id,
exc_info=True, exc_info=True,
) )
raise FederationError( raise FederationError(
@ -454,9 +437,7 @@ class FederationHandler(BaseHandler):
latest |= seen latest |= seen
logger.info( logger.info(
"[%s %s]: Requesting missing events between %s and %s", "Requesting missing events between %s and %s",
room_id,
event_id,
shortstr(latest), shortstr(latest),
event_id, event_id,
) )
@ -523,15 +504,11 @@ class FederationHandler(BaseHandler):
# We failed to get the missing events, but since we need to handle # We failed to get the missing events, but since we need to handle
# the case of `get_missing_events` not returning the necessary # the case of `get_missing_events` not returning the necessary
# events anyway, it is safe to simply log the error and continue. # events anyway, it is safe to simply log the error and continue.
logger.warning( logger.warning("Failed to get prev_events: %s", e)
"[%s %s]: Failed to get prev_events: %s", room_id, event_id, e
)
return return
logger.info( logger.info(
"[%s %s]: Got %d prev_events: %s", "Got %d prev_events: %s",
room_id,
event_id,
len(missing_events), len(missing_events),
shortstr(missing_events), shortstr(missing_events),
) )
@ -542,9 +519,7 @@ class FederationHandler(BaseHandler):
for ev in missing_events: for ev in missing_events:
logger.info( logger.info(
"[%s %s] Handling received prev_event %s", "Handling received prev_event %s",
room_id,
event_id,
ev.event_id, ev.event_id,
) )
with nested_logging_context(ev.event_id): with nested_logging_context(ev.event_id):
@ -553,9 +528,7 @@ class FederationHandler(BaseHandler):
except FederationError as e: except FederationError as e:
if e.code == 403: if e.code == 403:
logger.warning( logger.warning(
"[%s %s] Received prev_event %s failed history check.", "Received prev_event %s failed history check.",
room_id,
event_id,
ev.event_id, ev.event_id,
) )
else: else:
@ -707,10 +680,7 @@ class FederationHandler(BaseHandler):
(ie, we are missing one or more prev_events), the resolved state at the (ie, we are missing one or more prev_events), the resolved state at the
event event
""" """
room_id = event.room_id logger.debug("Processing event: %s", event)
event_id = event.event_id
logger.debug("[%s %s] Processing event: %s", room_id, event_id, event)
try: try:
await self._handle_new_event(origin, event, state=state) await self._handle_new_event(origin, event, state=state)