mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 16:53:53 +01:00
Improve logging when processing incoming transactions (#9596)
Put the room id in the logcontext, to make it easier to understand what's going on.
This commit is contained in:
parent
464e5da7b2
commit
2b328d7e02
3 changed files with 51 additions and 73 deletions
1
changelog.d/9596.misc
Normal file
1
changelog.d/9596.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Improve logging when processing incoming transactions.
|
|
@ -335,34 +335,41 @@ class FederationServer(FederationBase):
|
||||||
# impose a limit to avoid going too crazy with ram/cpu.
|
# impose a limit to avoid going too crazy with ram/cpu.
|
||||||
|
|
||||||
async def process_pdus_for_room(room_id: str):
|
async def process_pdus_for_room(room_id: str):
|
||||||
logger.debug("Processing PDUs for %s", room_id)
|
with nested_logging_context(room_id):
|
||||||
try:
|
logger.debug("Processing PDUs for %s", room_id)
|
||||||
await self.check_server_matches_acl(origin_host, room_id)
|
|
||||||
except AuthError as e:
|
|
||||||
logger.warning("Ignoring PDUs for room %s from banned server", room_id)
|
|
||||||
for pdu in pdus_by_room[room_id]:
|
|
||||||
event_id = pdu.event_id
|
|
||||||
pdu_results[event_id] = e.error_dict()
|
|
||||||
return
|
|
||||||
|
|
||||||
for pdu in pdus_by_room[room_id]:
|
try:
|
||||||
event_id = pdu.event_id
|
await self.check_server_matches_acl(origin_host, room_id)
|
||||||
with pdu_process_time.time():
|
except AuthError as e:
|
||||||
with nested_logging_context(event_id):
|
logger.warning(
|
||||||
try:
|
"Ignoring PDUs for room %s from banned server", room_id
|
||||||
await self._handle_received_pdu(origin, pdu)
|
)
|
||||||
pdu_results[event_id] = {}
|
for pdu in pdus_by_room[room_id]:
|
||||||
except FederationError as e:
|
event_id = pdu.event_id
|
||||||
logger.warning("Error handling PDU %s: %s", event_id, e)
|
pdu_results[event_id] = e.error_dict()
|
||||||
pdu_results[event_id] = {"error": str(e)}
|
return
|
||||||
except Exception as e:
|
|
||||||
f = failure.Failure()
|
for pdu in pdus_by_room[room_id]:
|
||||||
pdu_results[event_id] = {"error": str(e)}
|
pdu_results[pdu.event_id] = await process_pdu(pdu)
|
||||||
logger.error(
|
|
||||||
"Failed to handle PDU %s",
|
async def process_pdu(pdu: EventBase) -> JsonDict:
|
||||||
event_id,
|
event_id = pdu.event_id
|
||||||
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore
|
with pdu_process_time.time():
|
||||||
)
|
with nested_logging_context(event_id):
|
||||||
|
try:
|
||||||
|
await self._handle_received_pdu(origin, pdu)
|
||||||
|
return {}
|
||||||
|
except FederationError as e:
|
||||||
|
logger.warning("Error handling PDU %s: %s", event_id, e)
|
||||||
|
return {"error": str(e)}
|
||||||
|
except Exception as e:
|
||||||
|
f = failure.Failure()
|
||||||
|
logger.error(
|
||||||
|
"Failed to handle PDU %s",
|
||||||
|
event_id,
|
||||||
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore
|
||||||
|
)
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
await concurrently_execute(
|
await concurrently_execute(
|
||||||
process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
|
process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
|
||||||
|
|
|
@ -201,7 +201,7 @@ class FederationHandler(BaseHandler):
|
||||||
or pdu.internal_metadata.is_outlier()
|
or pdu.internal_metadata.is_outlier()
|
||||||
)
|
)
|
||||||
if already_seen:
|
if already_seen:
|
||||||
logger.debug("[%s %s]: Already seen pdu", room_id, event_id)
|
logger.debug("Already seen pdu")
|
||||||
return
|
return
|
||||||
|
|
||||||
# do some initial sanity-checking of the event. In particular, make
|
# do some initial sanity-checking of the event. In particular, make
|
||||||
|
@ -210,18 +210,14 @@ class FederationHandler(BaseHandler):
|
||||||
try:
|
try:
|
||||||
self._sanity_check_event(pdu)
|
self._sanity_check_event(pdu)
|
||||||
except SynapseError as err:
|
except SynapseError as err:
|
||||||
logger.warning(
|
logger.warning("Received event failed sanity checks")
|
||||||
"[%s %s] Received event failed sanity checks", room_id, event_id
|
|
||||||
)
|
|
||||||
raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id)
|
raise FederationError("ERROR", err.code, err.msg, affected=pdu.event_id)
|
||||||
|
|
||||||
# If we are currently in the process of joining this room, then we
|
# If we are currently in the process of joining this room, then we
|
||||||
# queue up events for later processing.
|
# queue up events for later processing.
|
||||||
if room_id in self.room_queues:
|
if room_id in self.room_queues:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Queuing PDU from %s for now: join in progress",
|
"Queuing PDU from %s for now: join in progress",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
origin,
|
origin,
|
||||||
)
|
)
|
||||||
self.room_queues[room_id].append((pdu, origin))
|
self.room_queues[room_id].append((pdu, origin))
|
||||||
|
@ -236,9 +232,7 @@ class FederationHandler(BaseHandler):
|
||||||
is_in_room = await self.auth.check_host_in_room(room_id, self.server_name)
|
is_in_room = await self.auth.check_host_in_room(room_id, self.server_name)
|
||||||
if not is_in_room:
|
if not is_in_room:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Ignoring PDU from %s as we're not in the room",
|
"Ignoring PDU from %s as we're not in the room",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
origin,
|
origin,
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
@ -250,7 +244,7 @@ class FederationHandler(BaseHandler):
|
||||||
# We only backfill backwards to the min depth.
|
# We only backfill backwards to the min depth.
|
||||||
min_depth = await self.get_min_depth_for_context(pdu.room_id)
|
min_depth = await self.get_min_depth_for_context(pdu.room_id)
|
||||||
|
|
||||||
logger.debug("[%s %s] min_depth: %d", room_id, event_id, min_depth)
|
logger.debug("min_depth: %d", min_depth)
|
||||||
|
|
||||||
prevs = set(pdu.prev_event_ids())
|
prevs = set(pdu.prev_event_ids())
|
||||||
seen = await self.store.have_events_in_timeline(prevs)
|
seen = await self.store.have_events_in_timeline(prevs)
|
||||||
|
@ -267,17 +261,13 @@ class FederationHandler(BaseHandler):
|
||||||
# If we're missing stuff, ensure we only fetch stuff one
|
# If we're missing stuff, ensure we only fetch stuff one
|
||||||
# at a time.
|
# at a time.
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Acquiring room lock to fetch %d missing prev_events: %s",
|
"Acquiring room lock to fetch %d missing prev_events: %s",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
len(missing_prevs),
|
len(missing_prevs),
|
||||||
shortstr(missing_prevs),
|
shortstr(missing_prevs),
|
||||||
)
|
)
|
||||||
with (await self._room_pdu_linearizer.queue(pdu.room_id)):
|
with (await self._room_pdu_linearizer.queue(pdu.room_id)):
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Acquired room lock to fetch %d missing prev_events",
|
"Acquired room lock to fetch %d missing prev_events",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
len(missing_prevs),
|
len(missing_prevs),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -297,9 +287,7 @@ class FederationHandler(BaseHandler):
|
||||||
|
|
||||||
if not prevs - seen:
|
if not prevs - seen:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Found all missing prev_events",
|
"Found all missing prev_events",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if prevs - seen:
|
if prevs - seen:
|
||||||
|
@ -329,9 +317,7 @@ class FederationHandler(BaseHandler):
|
||||||
|
|
||||||
if sent_to_us_directly:
|
if sent_to_us_directly:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"[%s %s] Rejecting: failed to fetch %d prev events: %s",
|
"Rejecting: failed to fetch %d prev events: %s",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
len(prevs - seen),
|
len(prevs - seen),
|
||||||
shortstr(prevs - seen),
|
shortstr(prevs - seen),
|
||||||
)
|
)
|
||||||
|
@ -414,10 +400,7 @@ class FederationHandler(BaseHandler):
|
||||||
state = [event_map[e] for e in state_map.values()]
|
state = [event_map[e] for e in state_map.values()]
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"[%s %s] Error attempting to resolve state at missing "
|
"Error attempting to resolve state at missing " "prev_events",
|
||||||
"prev_events",
|
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
raise FederationError(
|
raise FederationError(
|
||||||
|
@ -454,9 +437,7 @@ class FederationHandler(BaseHandler):
|
||||||
latest |= seen
|
latest |= seen
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s]: Requesting missing events between %s and %s",
|
"Requesting missing events between %s and %s",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
shortstr(latest),
|
shortstr(latest),
|
||||||
event_id,
|
event_id,
|
||||||
)
|
)
|
||||||
|
@ -523,15 +504,11 @@ class FederationHandler(BaseHandler):
|
||||||
# We failed to get the missing events, but since we need to handle
|
# We failed to get the missing events, but since we need to handle
|
||||||
# the case of `get_missing_events` not returning the necessary
|
# the case of `get_missing_events` not returning the necessary
|
||||||
# events anyway, it is safe to simply log the error and continue.
|
# events anyway, it is safe to simply log the error and continue.
|
||||||
logger.warning(
|
logger.warning("Failed to get prev_events: %s", e)
|
||||||
"[%s %s]: Failed to get prev_events: %s", room_id, event_id, e
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s]: Got %d prev_events: %s",
|
"Got %d prev_events: %s",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
len(missing_events),
|
len(missing_events),
|
||||||
shortstr(missing_events),
|
shortstr(missing_events),
|
||||||
)
|
)
|
||||||
|
@ -542,9 +519,7 @@ class FederationHandler(BaseHandler):
|
||||||
|
|
||||||
for ev in missing_events:
|
for ev in missing_events:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s %s] Handling received prev_event %s",
|
"Handling received prev_event %s",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
ev.event_id,
|
ev.event_id,
|
||||||
)
|
)
|
||||||
with nested_logging_context(ev.event_id):
|
with nested_logging_context(ev.event_id):
|
||||||
|
@ -553,9 +528,7 @@ class FederationHandler(BaseHandler):
|
||||||
except FederationError as e:
|
except FederationError as e:
|
||||||
if e.code == 403:
|
if e.code == 403:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"[%s %s] Received prev_event %s failed history check.",
|
"Received prev_event %s failed history check.",
|
||||||
room_id,
|
|
||||||
event_id,
|
|
||||||
ev.event_id,
|
ev.event_id,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -707,10 +680,7 @@ class FederationHandler(BaseHandler):
|
||||||
(ie, we are missing one or more prev_events), the resolved state at the
|
(ie, we are missing one or more prev_events), the resolved state at the
|
||||||
event
|
event
|
||||||
"""
|
"""
|
||||||
room_id = event.room_id
|
logger.debug("Processing event: %s", event)
|
||||||
event_id = event.event_id
|
|
||||||
|
|
||||||
logger.debug("[%s %s] Processing event: %s", room_id, event_id, event)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self._handle_new_event(origin, event, state=state)
|
await self._handle_new_event(origin, event, state=state)
|
||||||
|
|
Loading…
Reference in a new issue