Remove slaved id tracker (#14376)

This matches the multi instance writer ID generator class which can
both handle advancing the current token over replication and by calling
the database.
This commit is contained in:
Nick Mills-Barrett 2022-11-14 17:31:36 +00:00 committed by GitHub
parent e226513c0f
commit 36097e88c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 74 additions and 176 deletions

1
changelog.d/14376.misc Normal file
View file

@ -0,0 +1 @@
Remove old stream ID tracking code. Contributed by Nick @Beeper (@fizzadar).

View file

@ -1,13 +0,0 @@
# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View file

@ -1,13 +0,0 @@
# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View file

@ -1,50 +0,0 @@
# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Tuple
from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id
class SlavedIdTracker(AbstractStreamIdTracker):
"""Tracks the "current" stream ID of a stream with a single writer.
See `AbstractStreamIdTracker` for more details.
Note that this class does not work correctly when there are multiple
writers.
"""
def __init__(
self,
db_conn: LoggingDatabaseConnection,
table: str,
column: str,
extra_tables: Optional[List[Tuple[str, str]]] = None,
step: int = 1,
):
self.step = step
self._current = _load_current_id(db_conn, table, column, step)
if extra_tables:
for table, column in extra_tables:
self.advance(None, _load_current_id(db_conn, table, column))
def advance(self, instance_name: Optional[str], new_id: int) -> None:
self._current = (max if self.step > 0 else min)(self._current, new_id)
def get_current_token(self) -> int:
return self._current
def get_current_token_for_writer(self, instance_name: str) -> int:
return self.get_current_token()

View file

@ -27,7 +27,6 @@ from typing import (
) )
from synapse.api.constants import AccountDataTypes from synapse.api.constants import AccountDataTypes
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream
from synapse.storage._base import db_to_json from synapse.storage._base import db_to_json
from synapse.storage.database import ( from synapse.storage.database import (
@ -68,12 +67,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
# to write account data. A value of `True` implies that `_account_data_id_gen` # to write account data. A value of `True` implies that `_account_data_id_gen`
# is an `AbstractStreamIdGenerator` and not just a tracker. # is an `AbstractStreamIdGenerator` and not just a tracker.
self._account_data_id_gen: AbstractStreamIdTracker self._account_data_id_gen: AbstractStreamIdTracker
self._can_write_to_account_data = (
self._instance_name in hs.config.worker.writers.account_data
)
if isinstance(database.engine, PostgresEngine): if isinstance(database.engine, PostgresEngine):
self._can_write_to_account_data = (
self._instance_name in hs.config.worker.writers.account_data
)
self._account_data_id_gen = MultiWriterIdGenerator( self._account_data_id_gen = MultiWriterIdGenerator(
db_conn=db_conn, db_conn=db_conn,
db=database, db=database,
@ -95,21 +93,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
# `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
# updated over replication. (Multiple writers are not supported for # updated over replication. (Multiple writers are not supported for
# SQLite). # SQLite).
if self._instance_name in hs.config.worker.writers.account_data: self._account_data_id_gen = StreamIdGenerator(
self._can_write_to_account_data = True db_conn,
self._account_data_id_gen = StreamIdGenerator( "room_account_data",
db_conn, "stream_id",
"room_account_data", extra_tables=[("room_tags_revisions", "stream_id")],
"stream_id", is_writer=self._instance_name in hs.config.worker.writers.account_data,
extra_tables=[("room_tags_revisions", "stream_id")], )
)
else:
self._account_data_id_gen = SlavedIdTracker(
db_conn,
"room_account_data",
"stream_id",
extra_tables=[("room_tags_revisions", "stream_id")],
)
account_max = self.get_max_account_data_stream_id() account_max = self.get_max_account_data_stream_id()
self._account_data_stream_cache = StreamChangeCache( self._account_data_stream_cache = StreamChangeCache(

View file

@ -38,7 +38,6 @@ from synapse.logging.opentracing import (
whitelisted_homeserver, whitelisted_homeserver,
) )
from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import ( from synapse.storage.database import (
@ -86,28 +85,19 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
): ):
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)
if hs.config.worker.worker_app is None: # In the worker store this is an ID tracker which we overwrite in the non-worker
self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator( # class below that is used on the main process.
db_conn, self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
"device_lists_stream", db_conn,
"stream_id", "device_lists_stream",
extra_tables=[ "stream_id",
("user_signature_stream", "stream_id"), extra_tables=[
("device_lists_outbound_pokes", "stream_id"), ("user_signature_stream", "stream_id"),
("device_lists_changes_in_room", "stream_id"), ("device_lists_outbound_pokes", "stream_id"),
], ("device_lists_changes_in_room", "stream_id"),
) ],
else: is_writer=hs.config.worker.worker_app is None,
self._device_list_id_gen = SlavedIdTracker( )
db_conn,
"device_lists_stream",
"stream_id",
extra_tables=[
("user_signature_stream", "stream_id"),
("device_lists_outbound_pokes", "stream_id"),
("device_lists_changes_in_room", "stream_id"),
],
)
# Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
# StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker). # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).

View file

@ -59,7 +59,6 @@ from synapse.metrics.background_process_metrics import (
run_as_background_process, run_as_background_process,
wrap_as_background_process, wrap_as_background_process,
) )
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams import BackfillStream
from synapse.replication.tcp.streams.events import EventsStream from synapse.replication.tcp.streams.events import EventsStream
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@ -213,26 +212,20 @@ class EventsWorkerStore(SQLBaseStore):
# `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
# updated over replication. (Multiple writers are not supported for # updated over replication. (Multiple writers are not supported for
# SQLite). # SQLite).
if hs.get_instance_name() in hs.config.worker.writers.events: self._stream_id_gen = StreamIdGenerator(
self._stream_id_gen = StreamIdGenerator( db_conn,
db_conn, "events",
"events", "stream_ordering",
"stream_ordering", is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
) )
self._backfill_id_gen = StreamIdGenerator( self._backfill_id_gen = StreamIdGenerator(
db_conn, db_conn,
"events", "events",
"stream_ordering", "stream_ordering",
step=-1, step=-1,
extra_tables=[("ex_outlier_stream", "event_stream_ordering")], extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
) is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
else: )
self._stream_id_gen = SlavedIdTracker(
db_conn, "events", "stream_ordering"
)
self._backfill_id_gen = SlavedIdTracker(
db_conn, "events", "stream_ordering", step=-1
)
events_max = self._stream_id_gen.get_current_token() events_max = self._stream_id_gen.get_current_token()
curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict( curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(

View file

@ -30,7 +30,6 @@ from typing import (
from synapse.api.errors import StoreError from synapse.api.errors import StoreError
from synapse.config.homeserver import ExperimentalConfig from synapse.config.homeserver import ExperimentalConfig
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams import PushRulesStream from synapse.replication.tcp.streams import PushRulesStream
from synapse.storage._base import SQLBaseStore from synapse.storage._base import SQLBaseStore
from synapse.storage.database import ( from synapse.storage.database import (
@ -111,14 +110,14 @@ class PushRulesWorkerStore(
): ):
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)
if hs.config.worker.worker_app is None: # In the worker store this is an ID tracker which we overwrite in the non-worker
self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator( # class below that is used on the main process.
db_conn, "push_rules_stream", "stream_id" self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
) db_conn,
else: "push_rules_stream",
self._push_rules_stream_id_gen = SlavedIdTracker( "stream_id",
db_conn, "push_rules_stream", "stream_id" is_writer=hs.config.worker.worker_app is None,
) )
push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict( push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
db_conn, db_conn,

View file

@ -27,7 +27,6 @@ from typing import (
) )
from synapse.push import PusherConfig, ThrottleParams from synapse.push import PusherConfig, ThrottleParams
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams import PushersStream from synapse.replication.tcp.streams import PushersStream
from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.storage.database import ( from synapse.storage.database import (
@ -59,20 +58,15 @@ class PusherWorkerStore(SQLBaseStore):
): ):
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)
if hs.config.worker.worker_app is None: # In the worker store this is an ID tracker which we overwrite in the non-worker
self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator( # class below that is used on the main process.
db_conn, self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
"pushers", db_conn,
"id", "pushers",
extra_tables=[("deleted_pushers", "stream_id")], "id",
) extra_tables=[("deleted_pushers", "stream_id")],
else: is_writer=hs.config.worker.worker_app is None,
self._pushers_id_gen = SlavedIdTracker( )
db_conn,
"pushers",
"id",
extra_tables=[("deleted_pushers", "stream_id")],
)
self.db_pool.updates.register_background_update_handler( self.db_pool.updates.register_background_update_handler(
"remove_deactivated_pushers", "remove_deactivated_pushers",

View file

@ -27,7 +27,6 @@ from typing import (
) )
from synapse.api.constants import EduTypes from synapse.api.constants import EduTypes
from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
from synapse.replication.tcp.streams import ReceiptsStream from synapse.replication.tcp.streams import ReceiptsStream
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import ( from synapse.storage.database import (
@ -61,6 +60,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
hs: "HomeServer", hs: "HomeServer",
): ):
self._instance_name = hs.get_instance_name() self._instance_name = hs.get_instance_name()
# In the worker store this is an ID tracker which we overwrite in the non-worker
# class below that is used on the main process.
self._receipts_id_gen: AbstractStreamIdTracker self._receipts_id_gen: AbstractStreamIdTracker
if isinstance(database.engine, PostgresEngine): if isinstance(database.engine, PostgresEngine):
@ -87,14 +89,12 @@ class ReceiptsWorkerStore(SQLBaseStore):
# `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
# updated over replication. (Multiple writers are not supported for # updated over replication. (Multiple writers are not supported for
# SQLite). # SQLite).
if hs.get_instance_name() in hs.config.worker.writers.receipts: self._receipts_id_gen = StreamIdGenerator(
self._receipts_id_gen = StreamIdGenerator( db_conn,
db_conn, "receipts_linearized", "stream_id" "receipts_linearized",
) "stream_id",
else: is_writer=hs.get_instance_name() in hs.config.worker.writers.receipts,
self._receipts_id_gen = SlavedIdTracker( )
db_conn, "receipts_linearized", "stream_id"
)
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)

View file

@ -186,11 +186,13 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
column: str, column: str,
extra_tables: Iterable[Tuple[str, str]] = (), extra_tables: Iterable[Tuple[str, str]] = (),
step: int = 1, step: int = 1,
is_writer: bool = True,
) -> None: ) -> None:
assert step != 0 assert step != 0
self._lock = threading.Lock() self._lock = threading.Lock()
self._step: int = step self._step: int = step
self._current: int = _load_current_id(db_conn, table, column, step) self._current: int = _load_current_id(db_conn, table, column, step)
self._is_writer = is_writer
for table, column in extra_tables: for table, column in extra_tables:
self._current = (max if step > 0 else min)( self._current = (max if step > 0 else min)(
self._current, _load_current_id(db_conn, table, column, step) self._current, _load_current_id(db_conn, table, column, step)
@ -204,9 +206,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
self._unfinished_ids: OrderedDict[int, int] = OrderedDict() self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
def advance(self, instance_name: str, new_id: int) -> None: def advance(self, instance_name: str, new_id: int) -> None:
# `StreamIdGenerator` should only be used when there is a single writer, # Advance should never be called on a writer instance, only over replication
# so replication should never happen. if self._is_writer:
raise Exception("Replication is not supported by StreamIdGenerator") raise Exception("Replication is not supported by writer StreamIdGenerator")
self._current = (max if self._step > 0 else min)(self._current, new_id)
def get_next(self) -> AsyncContextManager[int]: def get_next(self) -> AsyncContextManager[int]:
with self._lock: with self._lock:
@ -249,6 +253,9 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
return _AsyncCtxManagerWrapper(manager()) return _AsyncCtxManagerWrapper(manager())
def get_current_token(self) -> int: def get_current_token(self) -> int:
if self._is_writer:
return self._current
with self._lock: with self._lock:
if self._unfinished_ids: if self._unfinished_ids:
return next(iter(self._unfinished_ids)) - self._step return next(iter(self._unfinished_ids)) - self._step