Faster joins: add issue links to the TODOs (#13004)

... to help us keep track of these things
2024-12-14 18:33:51 +01:00 · 2022-06-09 11:13:03 +01:00 · 2022-06-09 11:13:03 +01:00 · 7c6b2204d1
commit 7c6b2204d1
parent 97053c9406
9 changed files with 27 additions and 2 deletions
--- a/changelog.d/13004.misc
+++ b/changelog.d/13004.misc
@ -0,0 +1 @@
+Faster joins: add issue links to the TODO comments in the code.
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@ -545,6 +545,7 @@ class FederationHandler:
            if ret.partial_state:
                # TODO(faster_joins): roll this back if we don't manage to start the
                #   background resync (eg process_remote_join fails)
+                #   https://github.com/matrix-org/synapse/issues/12998
                await self.store.store_partial_state_room(room_id, ret.servers_in_room)

            max_stream_id = await self._federation_event_handler.process_remote_join(
@ -1506,14 +1507,17 @@ class FederationHandler:
        # TODO(faster_joins): do we need to lock to avoid races? What happens if other
        #   worker processes kick off a resync in parallel? Perhaps we should just elect
        #   a single worker to do the resync.
+        #   https://github.com/matrix-org/synapse/issues/12994
        #
        # TODO(faster_joins): what happens if we leave the room during a resync? if we
        #   really leave, that might mean we have difficulty getting the room state over
        #   federation.
+        #   https://github.com/matrix-org/synapse/issues/12802
        #
        # TODO(faster_joins): we need some way of prioritising which homeservers in
        #   `other_destinations` to try first, otherwise we'll spend ages trying dead
        #   homeservers for large rooms.
+        #   https://github.com/matrix-org/synapse/issues/12999

        if initial_destination is None and len(other_destinations) == 0:
            raise ValueError(
@ -1543,9 +1547,11 @@ class FederationHandler:
                # all the events are updated, so we can update current state and
                # clear the lazy-loading flag.
                logger.info("Updating current state for %s", room_id)
+                # TODO(faster_joins): support workers
+                #   https://github.com/matrix-org/synapse/issues/12994
                assert (
                    self._storage_controllers.persistence is not None
-                ), "TODO(faster_joins): support for workers"
+                ), "worker-mode deployments not currently supported here"
                await self._storage_controllers.persistence.update_current_state(
                    room_id
                )
@ -1559,6 +1565,8 @@ class FederationHandler:
                    )

                    # TODO(faster_joins) update room stats and user directory?
+                    #   https://github.com/matrix-org/synapse/issues/12814
+                    #   https://github.com/matrix-org/synapse/issues/12815
                    return

                # we raced against more events arriving with partial state. Go round
@ -1566,6 +1574,8 @@ class FederationHandler:
                # TODO(faster_joins): there is still a race here, whereby incoming events which raced
                #   with us will fail to be persisted after the call to `clear_partial_state_room` due to
                #   having partial state.
+                #   https://github.com/matrix-org/synapse/issues/12988
+                #
                continue

            events = await self.store.get_events_as_list(
@ -1588,6 +1598,7 @@ class FederationHandler:
                            #   indefinitely is also not the right thing to do if we can
                            #   reach all homeservers and they all claim they don't have
                            #   the state we want.
+                            #   https://github.com/matrix-org/synapse/issues/13000
                            logger.error(
                                "Failed to get state for %s at %s from %s because %s, "
                                "giving up!",
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@ -532,6 +532,7 @@ class FederationEventHandler:
                #
                # TODO(faster_joins): we probably need to be more intelligent, and
                #    exclude partial-state prev_events from consideration
+                #    https://github.com/matrix-org/synapse/issues/13001
                logger.warning(
                    "%s still has partial state: can't de-partial-state it yet",
                    event.event_id,
@ -777,6 +778,7 @@ class FederationEventHandler:
            state_ids = await self._resolve_state_at_missing_prevs(origin, event)
            # TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does
            #   not return partial state
+            #   https://github.com/matrix-org/synapse/issues/13002

            await self._process_received_pdu(
                origin, event, state_ids=state_ids, backfilled=backfilled
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@ -1102,6 +1102,7 @@ class EventCreationHandler:
            #
            # TODO(faster_joins): figure out how this works, and make sure that the
            #   old state is complete.
+            #   https://github.com/matrix-org/synapse/issues/13003
            metadata = await self.store.get_metadata_for_events(state_event_ids)

            state_map_for_event: MutableStateMap[str] = {}
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@ -388,10 +388,13 @@ class EventsPersistenceStorageController:

        # TODO(faster_joins): get a real stream ordering, to make this work correctly
        #    across workers.
+        #    https://github.com/matrix-org/synapse/issues/12994
        #
        # TODO(faster_joins): this can race against event persistence, in which case we
        #    will end up with incorrect state. Perhaps we should make this a job we
-        #    farm out to the event persister, somehow.
+        #    farm out to the event persister thread, somehow.
+        #    https://github.com/matrix-org/synapse/issues/13007
+        #
        stream_id = self.main_store.get_room_max_stream_ordering()
        await self.persist_events_store.update_current_state(room_id, delta, stream_id)

--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@ -452,6 +452,9 @@ class StateStorageController:
                 up to date.
        """
        # FIXME(faster_joins): what do we do here?
+        #   https://github.com/matrix-org/synapse/issues/12814
+        #   https://github.com/matrix-org/synapse/issues/12815
+        #   https://github.com/matrix-org/synapse/issues/13008

        return await self.stores.main.get_partial_current_state_deltas(
            prev_stream_id, max_stream_id
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@ -1112,6 +1112,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
        # this can race with incoming events, so we watch out for FK errors.
        # TODO(faster_joins): this still doesn't completely fix the race, since the persist process
        #   is not atomic. I fear we need an application-level lock.
+        #   https://github.com/matrix-org/synapse/issues/12988
        try:
            await self.db_pool.runInteraction(
                "clear_partial_state_room", self._clear_partial_state_room_txn, room_id
@ -1119,6 +1120,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
            return True
        except self.db_pool.engine.module.DatabaseError as e:
            # TODO(faster_joins): how do we distinguish between FK errors and other errors?
+            #   https://github.com/matrix-org/synapse/issues/12988
            logger.warning(
                "Exception while clearing lazy partial-state-room %s, retrying: %s",
                room_id,
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@ -435,6 +435,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
        )

        # TODO(faster_joins): need to do something about workers here
+        #   https://github.com/matrix-org/synapse/issues/12994
        txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,))
        txn.call_after(
            self._get_state_group_for_event.prefill,
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@ -546,6 +546,7 @@ class StateFilter:
        #  the sender of a piece of state wasn't actually in the room, then clearly that
        #  state shouldn't have been returned.
        #  We should at least add some tests around this to see what happens.
+        #  https://github.com/matrix-org/synapse/issues/13006

        # if we haven't requested membership events, then it depends on the value of
        # 'include_others'
				`@ -0,0 +1 @@`
				`Faster joins: add issue links to the TODO comments in the code.`