Fix 500 error on /messages when we accumulate more than 5 backward extremities (#11027)

Found while working on the Gitter backfill script and noticed
it only happened after we sent 7 batches, https://gitlab.com/gitterHQ/webapp/-/merge_requests/2229#note_665906390

When there are more than 5 backward extremities for a given depth,
backfill will throw an error because we sliced the extremity list
to 5 but then try to iterate over the full list. This causes
us to look for state that we never fetched and we get a `KeyError`.

Before when calling `/messages` when there are more than 5 backward extremities:
```
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 258, in _async_render_wrapper
    callback_return = await self._async_render(request)
  File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 446, in _async_render
    callback_return = await raw_callback_return
  File "/usr/local/lib/python3.8/site-packages/synapse/rest/client/room.py", line 580, in on_GET
    msgs = await self.pagination_handler.get_messages(
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/pagination.py", line 396, in get_messages
    await self.hs.get_federation_handler().maybe_backfill(
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 133, in maybe_backfill
    return await self._maybe_backfill_inner(room_id, current_depth, limit)
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 386, in _maybe_backfill_inner
    likely_extremeties_domains = get_domains_from_state(states[e_id])
KeyError: '$zpFflMEBtZdgcMQWTakaVItTLMjLFdKcRWUPHbbSZJl'
```
This commit is contained in:
Eric Eastwood 2021-10-14 18:53:45 -05:00 committed by GitHub
parent efd0074ab7
commit daf498e099
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 79 additions and 12 deletions

1
changelog.d/11027.bugfix Normal file
View file

@ -0,0 +1 @@
Fix 500 error on `/messages` when the server accumulates more than 5 backwards extremities at a given depth for a room.

View file

@ -238,18 +238,10 @@ class FederationHandler:
)
return False
logger.debug(
"room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s",
room_id,
current_depth,
max_depth,
sorted_extremeties_tuple,
)
# We ignore extremities that have a greater depth than our current depth
# as:
# 1. we don't really care about getting events that have happened
# before our current position; and
# after our current position; and
# 2. we have likely previously tried and failed to backfill from that
# extremity, so to avoid getting "stuck" requesting the same
# backfill repeatedly we drop those extremities.
@ -257,9 +249,19 @@ class FederationHandler:
t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth
]
logger.debug(
"room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s",
room_id,
current_depth,
limit,
max_depth,
sorted_extremeties_tuple,
filtered_sorted_extremeties_tuple,
)
# However, we need to check that the filtered extremities are non-empty.
# If they are empty then either we can a) bail or b) still attempt to
# backill. We opt to try backfilling anyway just in case we do get
# backfill. We opt to try backfilling anyway just in case we do get
# relevant events.
if filtered_sorted_extremeties_tuple:
sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
@ -389,7 +391,7 @@ class FederationHandler:
for key, state_dict in states.items()
}
for e_id, _ in sorted_extremeties_tuple:
for e_id in event_ids:
likely_extremeties_domains = get_domains_from_state(states[e_id])
success = await try_backfill(

View file

@ -392,7 +392,7 @@ class FederationEventHandler:
@log_function
async def backfill(
self, dest: str, room_id: str, limit: int, extremities: List[str]
self, dest: str, room_id: str, limit: int, extremities: Iterable[str]
) -> None:
"""Trigger a backfill request to `dest` for the given `room_id`

View file

@ -23,6 +23,7 @@ from synapse.federation.federation_base import event_from_pdu_json
from synapse.logging.context import LoggingContext, run_in_background
from synapse.rest import admin
from synapse.rest.client import login, room
from synapse.types import create_requester
from synapse.util.stringutils import random_string
from tests import unittest
@ -30,6 +31,10 @@ from tests import unittest
logger = logging.getLogger(__name__)
def generate_fake_event_id() -> str:
return "$fake_" + random_string(43)
class FederationTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
@ -198,6 +203,65 @@ class FederationTestCase(unittest.HomeserverTestCase):
self.assertEqual(sg, sg2)
def test_backfill_with_many_backward_extremities(self):
"""
Check that we can backfill with many backward extremities.
The goal is to make sure that when we only use a portion
of backwards extremities(the magic number is more than 5),
no errors are thrown.
Regression test, see #11027
"""
# create the room
user_id = self.register_user("kermit", "test")
tok = self.login("kermit", "test")
requester = create_requester(user_id)
room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
ev1 = self.helper.send(room_id, "first message", tok=tok)
# Create "many" backward extremities. The magic number we're trying to
# create more than is 5 which corresponds to the number of backward
# extremities we slice off in `_maybe_backfill_inner`
for _ in range(0, 8):
event_handler = self.hs.get_event_creation_handler()
event, context = self.get_success(
event_handler.create_event(
requester,
{
"type": "m.room.message",
"content": {
"msgtype": "m.text",
"body": "message connected to fake event",
},
"room_id": room_id,
"sender": user_id,
},
prev_event_ids=[
ev1["event_id"],
# We're creating an backward extremity each time thanks
# to this fake event
generate_fake_event_id(),
],
)
)
self.get_success(
event_handler.handle_new_client_event(requester, event, context)
)
current_depth = 1
limit = 100
with LoggingContext("receive_pdu"):
# Make sure backfill still works
d = run_in_background(
self.hs.get_federation_handler().maybe_backfill,
room_id,
current_depth,
limit,
)
self.get_success(d)
def test_backfill_floating_outlier_membership_auth(self):
"""
As the local homeserver, check that we can properly process a federated