Increase perf of handling concurrent use of StreamIDGenerators. (#9190)

We have seen a failure mode here where if there are many in flight unfinished IDs then marking an ID as finished takes a lot of CPU (as calling deque.remove iterates over the list)
2021-01-21 16:31:51 +00:00 · 2021-01-21 16:31:51 +00:00 · 12ec55bfaa
commit 12ec55bfaa
parent 939ef657ce
2 changed files with 14 additions and 8 deletions
--- a/changelog.d/9190.misc
+++ b/changelog.d/9190.misc
@ -0,0 +1 @@
 Improve performance of concurrent use of `StreamIDGenerators`.
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@ -15,12 +15,11 @@
 import heapq
 import logging
 import threading
-from collections import deque
+from collections import OrderedDict
 from contextlib import contextmanager
 from typing import Dict, List, Optional, Set, Tuple, Union
 import attr
 from typing_extensions import Deque
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.database import DatabasePool, LoggingTransaction
@ -101,7 +100,13 @@ class StreamIdGenerator:
            self._current = (max if step > 0 else min)(
                self._current, _load_current_id(db_conn, table, column, step)
            )
-        self._unfinished_ids = deque()  # type: Deque[int]
+
        # We use this as an ordered set, as we want to efficiently append items,
        # remove items and get the first item. Since we insert IDs in order, the
        # insertion ordering will ensure its in the correct ordering.
        #
        # The key and values are the same, but we never look at the values.
        self._unfinished_ids = OrderedDict()  # type: OrderedDict[int, int]
    def get_next(self):
        """
@ -113,7 +118,7 @@ class StreamIdGenerator:
            self._current += self._step
            next_id = self._current
-            self._unfinished_ids.append(next_id)
+            self._unfinished_ids[next_id] = next_id
        @contextmanager
        def manager():
@ -121,7 +126,7 @@ class StreamIdGenerator:
                yield next_id
            finally:
                with self._lock:
-                    self._unfinished_ids.remove(next_id)
+                    self._unfinished_ids.pop(next_id)
        return _AsyncCtxManagerWrapper(manager())
@ -140,7 +145,7 @@ class StreamIdGenerator:
            self._current += n * self._step
            for next_id in next_ids:
-                self._unfinished_ids.append(next_id)
+                self._unfinished_ids[next_id] = next_id
        @contextmanager
        def manager():
@ -149,7 +154,7 @@ class StreamIdGenerator:
            finally:
                with self._lock:
                    for next_id in next_ids:
-                        self._unfinished_ids.remove(next_id)
+                        self._unfinished_ids.pop(next_id)
        return _AsyncCtxManagerWrapper(manager())
@ -162,7 +167,7 @@ class StreamIdGenerator:
        """
        with self._lock:
            if self._unfinished_ids:
-                return self._unfinished_ids[0] - self._step
+                return next(iter(self._unfinished_ids)) - self._step
            return self._current
		`@ -0,0 +1 @@`
							Improve performance of concurrent use of `StreamIDGenerators`.