0
0
Fork 1
mirror of https://mau.dev/maunium/synapse.git synced 2024-06-01 02:08:56 +02:00
synapse/synapse/types/__init__.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1224 lines
40 KiB
Python
Raw Normal View History

2014-08-12 16:10:52 +02:00
#
2023-11-21 21:29:58 +01:00
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2014-2016 OpenMarket Ltd
2023-11-21 21:29:58 +01:00
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
2014-08-12 16:10:52 +02:00
#
#
2020-08-12 15:03:08 +02:00
import abc
import re
import string
from enum import Enum
from typing import (
TYPE_CHECKING,
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
AbstractSet,
Any,
ClassVar,
Dict,
List,
Literal,
Mapping,
Match,
MutableMapping,
NoReturn,
Optional,
Set,
Tuple,
Type,
TypeVar,
Union,
overload,
)
import attr
from immutabledict import immutabledict
2019-07-25 17:08:24 +02:00
from signedjson.key import decode_verify_key_bytes
from signedjson.types import VerifyKey
from typing_extensions import TypedDict
2019-07-25 17:08:24 +02:00
from unpaddedbase64 import decode_base64
from zope.interface import Interface
from twisted.internet.defer import CancelledError
from twisted.internet.interfaces import (
IReactorCore,
IReactorPluggableNameResolver,
IReactorSSL,
IReactorTCP,
2021-09-10 18:03:18 +02:00
IReactorThreads,
IReactorTime,
Experimental Unix socket support (#15353) * Add IReactorUNIX to ISynapseReactor type hint. * Create listen_unix(). Two options, 'path' to the file and 'mode' of permissions(not umask, recommend 666 as default as nginx/other reverse proxies write to it and it's setup as user www-data) For the moment, leave the option to always create a PID lockfile turned on by default * Create UnixListenerConfig and wire it up. Rename ListenerConfig to TCPListenerConfig, then Union them together into ListenerConfig. This spidered around a bit, but I think I got it all. Metrics and manhole have been placed behind a conditional in case of accidental putting them onto a unix socket. Use new helpers to get if a listener is configured for TLS, and to help create a site tag for logging. There are 2 TODO things in parse_listener_def() to finish up at a later point. * Refactor SynapseRequest to handle logging correctly when using a unix socket. This prevents an exception when an IP address can not be retrieved for a request. * Make the 'Synapse now listening on Unix socket' log line a little prettier. * No silent failures on generic workers when trying to use a unix socket with metrics or manhole. * Inline variables in app/_base.py * Update docstring for listen_unix() to remove reference to a hardcoded permission of 0o666 and add a few comments saying where the default IS declared. * Disallow both a unix socket and a ip/port combo on the same listener resource * Linting * Changelog * review: simplify how listen_unix returns(and get rid of a type: ignore) * review: fix typo from ConfigError in app/homeserver.py * review: roll conditional for http_options.tag into get_site_tag() helper(and add docstring) * review: enhance the conditionals for checking if a port or path is valid, remove a TODO line * review: Try updating comment in get_client_ip_if_available to clarify what is being retrieved and why * Pretty up how 'Synapse now listening on Unix Socket' looks by decoding the byte string. * review: In parse_listener_def(), raise ConfigError if neither socket_path nor port is declared(and fix a typo)
2023-04-03 11:27:51 +02:00
IReactorUNIX,
)
from synapse.api.errors import Codes, SynapseError
from synapse.util.cancellation import cancellable
from synapse.util.stringutils import parse_and_validate_server_name
2014-08-12 16:10:52 +02:00
if TYPE_CHECKING:
from typing_extensions import Self
from synapse.appservice.api import ApplicationService
from synapse.storage.databases.main import DataStore, PurgeEventsStore
from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
2020-01-16 14:31:22 +01:00
# Define a state map type from type/state_key to T (usually an event ID or
# event)
T = TypeVar("T")
StateKey = Tuple[str, str]
StateMap = Mapping[StateKey, T]
MutableStateMap = MutableMapping[StateKey, T]
2020-01-16 14:31:22 +01:00
# JSON types. These could be made stronger, but will do for now.
# A "simple" (canonical) JSON value.
SimpleJsonValue = Optional[Union[str, int, bool]]
JsonValue = Union[List[SimpleJsonValue], Tuple[SimpleJsonValue, ...], SimpleJsonValue]
# A JSON-serialisable dict.
JsonDict = Dict[str, Any]
# A JSON-serialisable mapping; roughly speaking an immutable JSONDict.
# Useful when you have a TypedDict which isn't going to be mutated and you don't want
# to cast to JsonDict everywhere.
JsonMapping = Mapping[str, Any]
# A JSON-serialisable object.
JsonSerializable = object
# Collection[str] that does not include str itself; str being a Sequence[str]
# is very misleading and results in bugs.
#
# StrCollection is an unordered collection of strings. If ordering is important,
# StrSequence can be used instead.
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
StrCollection = Union[Tuple[str, ...], List[str], AbstractSet[str]]
# Sequence[str] that does not include str itself; str being a Sequence[str]
# is very misleading and results in bugs.
#
# Unlike StrCollection, StrSequence is an ordered collection of strings.
StrSequence = Union[Tuple[str, ...], List[str]]
# Note that this seems to require inheriting *directly* from Interface in order
# for mypy-zope to realize it is an interface.
class ISynapseReactor(
2021-09-10 18:03:18 +02:00
IReactorTCP,
IReactorSSL,
Experimental Unix socket support (#15353) * Add IReactorUNIX to ISynapseReactor type hint. * Create listen_unix(). Two options, 'path' to the file and 'mode' of permissions(not umask, recommend 666 as default as nginx/other reverse proxies write to it and it's setup as user www-data) For the moment, leave the option to always create a PID lockfile turned on by default * Create UnixListenerConfig and wire it up. Rename ListenerConfig to TCPListenerConfig, then Union them together into ListenerConfig. This spidered around a bit, but I think I got it all. Metrics and manhole have been placed behind a conditional in case of accidental putting them onto a unix socket. Use new helpers to get if a listener is configured for TLS, and to help create a site tag for logging. There are 2 TODO things in parse_listener_def() to finish up at a later point. * Refactor SynapseRequest to handle logging correctly when using a unix socket. This prevents an exception when an IP address can not be retrieved for a request. * Make the 'Synapse now listening on Unix socket' log line a little prettier. * No silent failures on generic workers when trying to use a unix socket with metrics or manhole. * Inline variables in app/_base.py * Update docstring for listen_unix() to remove reference to a hardcoded permission of 0o666 and add a few comments saying where the default IS declared. * Disallow both a unix socket and a ip/port combo on the same listener resource * Linting * Changelog * review: simplify how listen_unix returns(and get rid of a type: ignore) * review: fix typo from ConfigError in app/homeserver.py * review: roll conditional for http_options.tag into get_site_tag() helper(and add docstring) * review: enhance the conditionals for checking if a port or path is valid, remove a TODO line * review: Try updating comment in get_client_ip_if_available to clarify what is being retrieved and why * Pretty up how 'Synapse now listening on Unix Socket' looks by decoding the byte string. * review: In parse_listener_def(), raise ConfigError if neither socket_path nor port is declared(and fix a typo)
2023-04-03 11:27:51 +02:00
IReactorUNIX,
2021-09-10 18:03:18 +02:00
IReactorPluggableNameResolver,
IReactorTime,
IReactorCore,
IReactorThreads,
Interface,
):
"""The interfaces necessary for Synapse to function."""
@attr.s(frozen=True, slots=True, auto_attribs=True)
class Requester:
"""
Represents the user making a request
Attributes:
user: id of the user making the request
access_token_id: *ID* of the access token used for this request, or
None for appservices, guests, and tokens generated by the admin API
is_guest: True if the user making this request is a guest user
shadow_banned: True if the user making this request has been shadow-banned.
device_id: device_id which was set at authentication time, or
None for appservices, guests, and tokens generated by the admin API
app_service: the AS requesting on behalf of the user
authenticated_entity: The entity that authenticated when making the request.
This is different to the user_id when an admin user or the server is
"puppeting" the user.
"""
user: "UserID"
access_token_id: Optional[int]
is_guest: bool
2022-06-17 16:58:05 +02:00
scope: Set[str]
shadow_banned: bool
device_id: Optional[str]
app_service: Optional["ApplicationService"]
authenticated_entity: str
def serialize(self) -> Dict[str, Any]:
"""Converts self to a type that can be serialized as JSON, and then
deserialized by `deserialize`
Returns:
dict
"""
return {
"user_id": self.user.to_string(),
"access_token_id": self.access_token_id,
"is_guest": self.is_guest,
2022-06-17 16:58:05 +02:00
"scope": list(self.scope),
"shadow_banned": self.shadow_banned,
"device_id": self.device_id,
"app_server_id": self.app_service.id if self.app_service else None,
"authenticated_entity": self.authenticated_entity,
}
@staticmethod
def deserialize(
store: "ApplicationServiceWorkerStore", input: Dict[str, Any]
) -> "Requester":
"""Converts a dict that was produced by `serialize` back into a
Requester.
Args:
store: Used to convert AS ID to AS object
input: A dict produced by `serialize`
Returns:
Requester
"""
appservice = None
if input["app_server_id"]:
appservice = store.get_app_service_by_id(input["app_server_id"])
return Requester(
user=UserID.from_string(input["user_id"]),
access_token_id=input["access_token_id"],
is_guest=input["is_guest"],
scope=set(input.get("scope", [])),
shadow_banned=input["shadow_banned"],
device_id=input["device_id"],
app_service=appservice,
authenticated_entity=input["authenticated_entity"],
)
def create_requester(
user_id: Union[str, "UserID"],
access_token_id: Optional[int] = None,
is_guest: bool = False,
2022-06-17 16:58:05 +02:00
scope: StrCollection = (),
shadow_banned: bool = False,
device_id: Optional[str] = None,
app_service: Optional["ApplicationService"] = None,
authenticated_entity: Optional[str] = None,
) -> Requester:
"""
Create a new ``Requester`` object
Args:
user_id: id of the user making the request
access_token_id: *ID* of the access token used for this
request, or None if it came via the appservice API or similar
is_guest: True if the user making this request is a guest user
2022-06-17 16:58:05 +02:00
scope: the scope of the access token used for this request, if any
shadow_banned: True if the user making this request is shadow-banned.
device_id: device_id which was set at authentication time
app_service: the AS requesting on behalf of the user
authenticated_entity: The entity that authenticated when making the request.
This is different to the user_id when an admin user or the server is
"puppeting" the user.
Returns:
Requester
"""
if not isinstance(user_id, UserID):
user_id = UserID.from_string(user_id)
if authenticated_entity is None:
authenticated_entity = user_id.to_string()
2022-06-17 16:58:05 +02:00
scope = set(scope)
return Requester(
user_id,
access_token_id,
is_guest,
2022-06-17 16:58:05 +02:00
scope,
shadow_banned,
device_id,
app_service,
authenticated_entity,
)
def get_domain_from_id(string: str) -> str:
2017-05-16 15:07:08 +02:00
idx = string.find(":")
if idx == -1:
raise SynapseError(400, "Invalid ID: %r" % (string,))
2017-05-16 15:07:08 +02:00
return string[idx + 1 :]
2016-05-09 11:36:03 +02:00
def get_localpart_from_id(string: str) -> str:
2017-05-31 15:29:32 +02:00
idx = string.find(":")
if idx == -1:
raise SynapseError(400, "Invalid ID: %r" % (string,))
return string[1:idx]
2020-08-12 15:03:08 +02:00
DS = TypeVar("DS", bound="DomainSpecificString")
@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True)
class DomainSpecificString(metaclass=abc.ABCMeta):
2014-08-12 16:10:52 +02:00
"""Common base class among ID/name strings that have a local part and a
domain name, prefixed with a sigil.
Has the fields:
'localpart' : The local part of the name (without the leading sigil)
'domain' : The domain part of the name
"""
SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore
2020-08-12 15:03:08 +02:00
localpart: str
domain: str
# Because this is a frozen class, it is deeply immutable.
def __copy__(self: DS) -> DS:
return self
def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS:
return self
2014-08-12 16:10:52 +02:00
@classmethod
2020-08-12 15:03:08 +02:00
def from_string(cls: Type[DS], s: str) -> DS:
2014-08-12 16:10:52 +02:00
"""Parse the string given by 's' into a structure object."""
if len(s) < 1 or s[0:1] != cls.SIGIL:
raise SynapseError(
400,
"Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL),
Codes.INVALID_PARAM,
2014-08-12 16:10:52 +02:00
)
parts = s[1:].split(":", 1)
if len(parts) != 2:
raise SynapseError(
2014-08-12 16:10:52 +02:00
400,
"Expected %s of the form '%slocalname:domain'"
% (cls.__name__, cls.SIGIL),
Codes.INVALID_PARAM,
2014-08-12 16:10:52 +02:00
)
domain = parts[1]
# This code will need changing if we want to support multiple domain
# names on one HS
return cls(localpart=parts[0], domain=domain)
2014-08-12 16:10:52 +02:00
2020-08-12 15:03:08 +02:00
def to_string(self) -> str:
2014-08-12 16:10:52 +02:00
"""Return a string encoding the fields of the structure object."""
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
@classmethod
2020-08-12 15:03:08 +02:00
def is_valid(cls: Type[DS], s: str) -> bool:
"""Parses the input string and attempts to ensure it is valid."""
# TODO: this does not reject an empty localpart or an overly-long string.
# See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
try:
obj = cls.from_string(s)
# Apply additional validation to the domain. This is only done
# during is_valid (and not part of from_string) since it is
# possible for invalid data to exist in room-state, etc.
parse_and_validate_server_name(obj.domain)
return True
except Exception:
return False
__repr__ = to_string
2014-08-12 16:10:52 +02:00
@attr.s(slots=True, frozen=True, repr=False)
2014-08-12 16:10:52 +02:00
class UserID(DomainSpecificString):
"""Structure representing a user ID."""
2019-06-20 11:32:02 +02:00
2014-08-12 16:10:52 +02:00
SIGIL = "@"
@attr.s(slots=True, frozen=True, repr=False)
2014-08-12 16:10:52 +02:00
class RoomAlias(DomainSpecificString):
"""Structure representing a room name."""
2019-06-20 11:32:02 +02:00
2014-08-12 16:10:52 +02:00
SIGIL = "#"
@attr.s(slots=True, frozen=True, repr=False)
2014-08-12 16:10:52 +02:00
class RoomID(DomainSpecificString):
"""Structure representing a room id."""
2019-06-20 11:32:02 +02:00
2014-08-12 16:10:52 +02:00
SIGIL = "!"
2014-08-21 11:55:54 +02:00
@attr.s(slots=True, frozen=True, repr=False)
class EventID(DomainSpecificString):
"""Structure representing an event id."""
2019-06-20 11:32:02 +02:00
SIGIL = "$"
MXID_LOCALPART_ALLOWED_CHARACTERS = set(
"_-./=+" + string.ascii_lowercase + string.digits
)
# Guest user IDs are purely numeric.
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
def contains_invalid_mxid_characters(localpart: str) -> bool:
"""Check for characters not allowed in an mxid or groupid localpart
Args:
localpart: the localpart to be checked
use_extended_character_set: True to use the extended allowed characters
from MSC4009.
Returns:
True if there are any naughty characters
"""
return any(c not in MXID_LOCALPART_ALLOWED_CHARACTERS for c in localpart)
2017-07-18 10:47:25 +02:00
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
# the following is a pattern which matches '=', and bytes which are not allowed in a mxid
# localpart.
#
# It works by:
# * building a string containing the allowed characters (excluding '=')
# * escaping every special character with a backslash (to stop '-' being interpreted as a
# range operator)
# * wrapping it in a '[^...]' regex
# * converting the whole lot to a 'bytes' sequence, so that we can use it to match
# bytes rather than strings
#
NON_MXID_CHARACTER_PATTERN = re.compile(
("[^%s]" % (re.escape("".join(MXID_LOCALPART_ALLOWED_CHARACTERS - {"="})),)).encode(
"ascii"
)
)
def map_username_to_mxid_localpart(
username: Union[str, bytes], case_sensitive: bool = False
) -> str:
"""Map a username onto a string suitable for a MXID
This follows the algorithm laid out at
https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets.
Args:
username: username to be mapped
case_sensitive: true if TEST and test should be mapped
onto different mxids
Returns:
string suitable for a mxid localpart
"""
if not isinstance(username, bytes):
username = username.encode("utf-8")
# first we sort out upper-case characters
if case_sensitive:
2019-06-20 11:32:02 +02:00
def f1(m: Match[bytes]) -> bytes:
return b"_" + m.group().lower()
username = UPPER_CASE_PATTERN.sub(f1, username)
else:
username = username.lower()
# then we sort out non-ascii characters by converting to the hex equivalent.
def f2(m: Match[bytes]) -> bytes:
return b"=%02x" % (m.group()[0],)
username = NON_MXID_CHARACTER_PATTERN.sub(f2, username)
# we also do the =-escaping to mxids starting with an underscore.
username = re.sub(b"^_", b"=5f", username)
# we should now only have ascii bytes left, so can decode back to a string.
return username.decode("ascii")
@attr.s(frozen=True, slots=True, order=False)
class AbstractMultiWriterStreamToken(metaclass=abc.ABCMeta):
"""An abstract stream token class for streams that supports multiple
writers.
This works by keeping track of the stream position of each writer,
represented by a default `stream` attribute and a map of instance name to
stream position of any writers that are ahead of the default stream
position.
"""
stream: int = attr.ib(validator=attr.validators.instance_of(int), kw_only=True)
instance_map: "immutabledict[str, int]" = attr.ib(
factory=immutabledict,
validator=attr.validators.deep_mapping(
key_validator=attr.validators.instance_of(str),
value_validator=attr.validators.instance_of(int),
mapping_validator=attr.validators.instance_of(immutabledict),
),
kw_only=True,
)
@classmethod
@abc.abstractmethod
async def parse(cls, store: "DataStore", string: str) -> "Self":
"""Parse the string representation of the token."""
...
@abc.abstractmethod
async def to_string(self, store: "DataStore") -> str:
"""Serialize the token into its string representation."""
...
def copy_and_advance(self, other: "Self") -> "Self":
"""Return a new token such that if an event is after both this token and
the other token, then its after the returned token too.
"""
max_stream = max(self.stream, other.stream)
instance_map = {
instance: max(
self.instance_map.get(instance, self.stream),
other.instance_map.get(instance, other.stream),
)
for instance in set(self.instance_map).union(other.instance_map)
}
return attr.evolve(
self, stream=max_stream, instance_map=immutabledict(instance_map)
)
def get_max_stream_pos(self) -> int:
"""Get the maximum stream position referenced in this token.
The corresponding "min" position is, by definition just `self.stream`.
This is used to handle tokens that have non-empty `instance_map`, and so
reference stream positions after the `self.stream` position.
"""
return max(self.instance_map.values(), default=self.stream)
def get_stream_pos_for_instance(self, instance_name: str) -> int:
"""Get the stream position that the given writer was at at this token."""
# If we don't have an entry for the instance we can assume that it was
# at `self.stream`.
return self.instance_map.get(instance_name, self.stream)
@attr.s(frozen=True, slots=True, order=False)
class RoomStreamToken(AbstractMultiWriterStreamToken):
"""Tokens are positions between events. The token "s1" comes after event 1.
s0 s1
| |
[0] [1] [2]
Tokens can either be a point in the live event stream or a cursor going
through historic events.
When traversing the live event stream, events are ordered by
`stream_ordering` (when they arrived at the homeserver).
When traversing historic events, events are first ordered by their `depth`
(`topological_ordering` in the event graph) and tie-broken by
`stream_ordering` (when the event arrived at the homeserver).
If you're looking for more info about what a token with all of the
underscores means, ex.
`s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring
for `StreamToken` below.
---
Live tokens start with an "s" followed by the `stream_ordering` of the event
that comes before the position of the token. Said another way:
`stream_ordering` uniquely identifies a persisted event. The live token
means "the position just after the event identified by `stream_ordering`".
An example token is:
s2633508
---
Historic tokens start with a "t" followed by the `depth`
(`topological_ordering` in the event graph) of the event that comes before
the position of the token, followed by "-", followed by the
`stream_ordering` of the event that comes before the position of the token.
An example token is:
t426-2633508
---
There is also a third mode for live tokens where the token starts with "m",
which is sometimes used when using sharded event persisters. In this case
the events stream is considered to be a set of streams (one for each writer)
and the token encodes the vector clock of positions of each writer in their
respective streams.
The format of the token in such case is an initial integer min position,
followed by the mapping of instance ID to position separated by '.' and '~':
m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ...
The `min_pos` corresponds to the minimum position all writers have persisted
up to, and then only writers that are ahead of that position need to be
encoded. An example token is:
m56~2.58~3.59
Which corresponds to a set of three (or more writers) where instances 2 and
3 (these are instance IDs that can be looked up in the DB to fetch the more
commonly used instance names) are at positions 58 and 59 respectively, and
all other instances are at position 56.
Note: The `RoomStreamToken` cannot have both a topological part and an
instance map.
---
For caching purposes, `RoomStreamToken`s and by extension, all their
attributes, must be hashable.
"""
topological: Optional[int] = attr.ib(
validator=attr.validators.optional(attr.validators.instance_of(int)),
kw_only=True,
default=None,
)
def __attrs_post_init__(self) -> None:
"""Validates that both `topological` and `instance_map` aren't set."""
if self.instance_map and self.topological:
raise ValueError(
"Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'."
)
@classmethod
async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken":
try:
if string[0] == "s":
return cls(topological=None, stream=int(string[1:]))
if string[0] == "t":
parts = string[1:].split("-", 1)
return cls(topological=int(parts[0]), stream=int(parts[1]))
if string[0] == "m":
parts = string[1:].split("~")
stream = int(parts[0])
instance_map = {}
for part in parts[1:]:
key, value = part.split(".")
instance_id = int(key)
pos = int(value)
instance_name = await store.get_name_from_instance_id(instance_id) # type: ignore[attr-defined]
instance_map[instance_name] = pos
return cls(
topological=None,
stream=stream,
instance_map=immutabledict(instance_map),
)
except CancelledError:
raise
except Exception:
pass
raise SynapseError(400, "Invalid room stream token %r" % (string,))
@classmethod
def parse_stream_token(cls, string: str) -> "RoomStreamToken":
try:
if string[0] == "s":
return cls(topological=None, stream=int(string[1:]))
except Exception:
pass
raise SynapseError(400, "Invalid room stream token %r" % (string,))
def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken":
"""Return a new token such that if an event is after both this token and
the other token, then its after the returned token too.
"""
if self.topological or other.topological:
raise Exception("Can't advance topological tokens")
return super().copy_and_advance(other)
def as_historical_tuple(self) -> Tuple[int, int]:
"""Returns a tuple of `(topological, stream)` for historical tokens.
Raises if not an historical token (i.e. doesn't have a topological part).
"""
if self.topological is None:
raise Exception(
"Cannot call `RoomStreamToken.as_historical_tuple` on live token"
)
return self.topological, self.stream
def get_stream_pos_for_instance(self, instance_name: str) -> int:
"""Get the stream position that the given writer was at at this token.
This only makes sense for "live" tokens that may have a vector clock
component, and so asserts that this is a "live" token.
"""
assert self.topological is None
# If we don't have an entry for the instance we can assume that it was
# at `self.stream`.
return self.instance_map.get(instance_name, self.stream)
async def to_string(self, store: "DataStore") -> str:
if self.topological is not None:
return "t%d-%d" % (self.topological, self.stream)
elif self.instance_map:
entries = []
for name, pos in self.instance_map.items():
if pos <= self.stream:
# Ignore instances who are below the minimum stream position
# (we might know they've advanced without seeing a recent
# write from them).
continue
instance_id = await store.get_id_for_instance(name)
entries.append(f"{instance_id}.{pos}")
encoded_map = "~".join(entries)
return f"m{self.stream}~{encoded_map}"
else:
return "s%d" % (self.stream,)
@attr.s(frozen=True, slots=True, order=False)
class MultiWriterStreamToken(AbstractMultiWriterStreamToken):
"""A basic stream token class for streams that supports multiple writers."""
@classmethod
async def parse(cls, store: "DataStore", string: str) -> "MultiWriterStreamToken":
try:
if string[0].isdigit():
return cls(stream=int(string))
if string[0] == "m":
parts = string[1:].split("~")
stream = int(parts[0])
instance_map = {}
for part in parts[1:]:
key, value = part.split(".")
instance_id = int(key)
pos = int(value)
instance_name = await store.get_name_from_instance_id(instance_id)
instance_map[instance_name] = pos
return cls(
stream=stream,
instance_map=immutabledict(instance_map),
)
except CancelledError:
raise
except Exception:
pass
raise SynapseError(400, "Invalid stream token %r" % (string,))
async def to_string(self, store: "DataStore") -> str:
if self.instance_map:
entries = []
for name, pos in self.instance_map.items():
if pos <= self.stream:
# Ignore instances who are below the minimum stream position
# (we might know they've advanced without seeing a recent
# write from them).
continue
instance_id = await store.get_id_for_instance(name)
entries.append(f"{instance_id}.{pos}")
encoded_map = "~".join(entries)
return f"m{self.stream}~{encoded_map}"
else:
return str(self.stream)
@staticmethod
def is_stream_position_in_range(
low: Optional["AbstractMultiWriterStreamToken"],
high: Optional["AbstractMultiWriterStreamToken"],
instance_name: Optional[str],
pos: int,
) -> bool:
"""Checks if a given persisted position is between the two given tokens.
If `instance_name` is None then the row was persisted before multi
writer support.
"""
if low:
if instance_name:
low_stream = low.instance_map.get(instance_name, low.stream)
else:
low_stream = low.stream
if pos <= low_stream:
return False
if high:
if instance_name:
high_stream = high.instance_map.get(instance_name, high.stream)
else:
high_stream = high.stream
if high_stream < pos:
return False
return True
class StreamKeyType(Enum):
"""Known stream types.
A stream is a list of entities ordered by an incrementing "stream token".
"""
ROOM = "room_key"
PRESENCE = "presence_key"
TYPING = "typing_key"
RECEIPT = "receipt_key"
ACCOUNT_DATA = "account_data_key"
PUSH_RULES = "push_rules_key"
TO_DEVICE = "to_device_key"
DEVICE_LIST = "device_list_key"
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
UN_PARTIAL_STATED_ROOMS = "un_partial_stated_rooms_key"
@attr.s(slots=True, frozen=True, auto_attribs=True)
class StreamToken:
"""A collection of keys joined together by underscores in the following
order and which represent the position in their respective streams.
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1_379`
1. `room_key`: `s2633508` which is a `RoomStreamToken`
- `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59`
- See the docstring for `RoomStreamToken` for more details.
2. `presence_key`: `17`
3. `typing_key`: `338`
4. `receipt_key`: `6732159`
5. `account_data_key`: `1082514`
6. `push_rules_key`: `541479`
7. `to_device_key`: `274711`
8. `device_list_key`: `265584`
9. `groups_key`: `1` (note that this key is now unused)
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
10. `un_partial_stated_rooms_key`: `379`
You can see how many of these keys correspond to the various
fields in a "/sync" response:
```json
{
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
"next_batch": "s12_4_0_1_1_1_1_4_1_1",
"presence": {
"events": []
},
"device_lists": {
"changed": []
},
"rooms": {
"join": {
"!QrZlfIDQLNLdZHqTnt:hs1": {
"timeline": {
"events": [],
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
"prev_batch": "s10_4_0_1_1_1_1_4_1_1",
"limited": false
},
"state": {
"events": []
},
"account_data": {
"events": []
},
"ephemeral": {
"events": []
}
}
}
}
}
```
---
For caching purposes, `StreamToken`s and by extension, all their attributes,
must be hashable.
"""
room_key: RoomStreamToken = attr.ib(
validator=attr.validators.instance_of(RoomStreamToken)
)
presence_key: int
typing_key: int
receipt_key: MultiWriterStreamToken = attr.ib(
validator=attr.validators.instance_of(MultiWriterStreamToken)
)
account_data_key: int
push_rules_key: int
to_device_key: int
device_list_key: int
# Note that the groups key is no longer used and may have bogus values.
groups_key: int
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
un_partial_stated_rooms_key: int
2014-08-21 11:55:54 +02:00
_SEPARATOR = "_"
START: ClassVar["StreamToken"]
2014-08-21 11:55:54 +02:00
@classmethod
@cancellable
async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
"""
Creates a RoomStreamToken from its textual representation.
"""
2014-08-21 11:55:54 +02:00
try:
keys = string.split(cls._SEPARATOR)
while len(keys) < len(attr.fields(cls)):
2015-07-02 12:40:22 +02:00
# i.e. old token from before receipt_key
keys.append("0")
(
room_key,
presence_key,
typing_key,
receipt_key,
account_data_key,
push_rules_key,
to_device_key,
device_list_key,
groups_key,
un_partial_stated_rooms_key,
) = keys
return cls(
room_key=await RoomStreamToken.parse(store, room_key),
presence_key=int(presence_key),
typing_key=int(typing_key),
receipt_key=await MultiWriterStreamToken.parse(store, receipt_key),
account_data_key=int(account_data_key),
push_rules_key=int(push_rules_key),
to_device_key=int(to_device_key),
device_list_key=int(device_list_key),
groups_key=int(groups_key),
un_partial_stated_rooms_key=int(un_partial_stated_rooms_key),
)
except CancelledError:
raise
except Exception:
raise SynapseError(400, "Invalid stream token")
2014-08-21 11:55:54 +02:00
async def to_string(self, store: "DataStore") -> str:
return self._SEPARATOR.join(
[
await self.room_key.to_string(store),
str(self.presence_key),
str(self.typing_key),
await self.receipt_key.to_string(store),
str(self.account_data_key),
str(self.push_rules_key),
str(self.to_device_key),
str(self.device_list_key),
# Note that the groups key is no longer used, but it is still
# serialized so that there will not be confusion in the future
# if additional tokens are added.
str(self.groups_key),
Faster joins: omit partial rooms from eager syncs until the resync completes (#14870) * Allow `AbstractSet` in `StrCollection` Or else frozensets are excluded. This will be useful in an upcoming commit where I plan to change a function that accepts `List[str]` to accept `StrCollection` instead. * `rooms_to_exclude` -> `rooms_to_exclude_globally` I am about to make use of this exclusion mechanism to exclude rooms for a specific user and a specific sync. This rename helps to clarify the distinction between the global config and the rooms to exclude for a specific sync. * Better function names for internal sync methods * Track a list of excluded rooms on SyncResultBuilder I plan to feed a list of partially stated rooms for this sync to ignore * Exclude partial state rooms during eager sync using the mechanism established in the previous commit * Track un-partial-state stream in sync tokens So that we can work out which rooms have become fully-stated during a given sync period. * Fix mutation of `@cached` return value This was fouling up a complement test added alongside this PR. Excluding a room would mean the set of forgotten rooms in the cache would be extended. This means that room could be erroneously considered forgotten in the future. Introduced in #12310, Synapse 1.57.0. I don't think this had any user-visible side effects (until now). * SyncResultBuilder: track rooms to force as newly joined Similar plan as before. We've omitted rooms from certain sync responses; now we establish the mechanism to reintroduce them into future syncs. * Read new field, to present rooms as newly joined * Force un-partial-stated rooms to be newly-joined for eager incremental syncs only, provided they're still fully stated * Notify user stream listeners to wake up long polling syncs * Changelog * Typo fix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Unnecessary list cast Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Rephrase comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Another comment Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Fixup merge(?) * Poke notifier when receiving un-partial-stated msg over replication * Fixup merge whoops Thanks MV :) Co-authored-by: Mathieu Velen <mathieuv@matrix.org> Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
2023-01-23 16:44:39 +01:00
str(self.un_partial_stated_rooms_key),
]
)
@property
def room_stream_id(self) -> int:
return self.room_key.stream
def copy_and_advance(self, key: StreamKeyType, new_value: Any) -> "StreamToken":
"""Advance the given key in the token to a new value if and only if the
new value is after the old value.
:raises TypeError: if `key` is not the one of the keys tracked by a StreamToken.
"""
if key == StreamKeyType.ROOM:
new_token = self.copy_and_replace(
StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value)
)
return new_token
elif key == StreamKeyType.RECEIPT:
new_token = self.copy_and_replace(
StreamKeyType.RECEIPT, self.receipt_key.copy_and_advance(new_value)
)
return new_token
new_token = self.copy_and_replace(key, new_value)
new_id = new_token.get_field(key)
old_id = self.get_field(key)
if old_id < new_id:
return new_token
else:
return self
def copy_and_replace(self, key: StreamKeyType, new_value: Any) -> "StreamToken":
return attr.evolve(self, **{key.value: new_value})
@overload
def get_field(self, key: Literal[StreamKeyType.ROOM]) -> RoomStreamToken: ...
@overload
def get_field(
self, key: Literal[StreamKeyType.RECEIPT]
) -> MultiWriterStreamToken: ...
@overload
def get_field(
self,
key: Literal[
StreamKeyType.ACCOUNT_DATA,
StreamKeyType.DEVICE_LIST,
StreamKeyType.PRESENCE,
StreamKeyType.PUSH_RULES,
StreamKeyType.TO_DEVICE,
StreamKeyType.TYPING,
StreamKeyType.UN_PARTIAL_STATED_ROOMS,
],
) -> int: ...
@overload
def get_field(
self, key: StreamKeyType
) -> Union[int, RoomStreamToken, MultiWriterStreamToken]: ...
def get_field(
self, key: StreamKeyType
) -> Union[int, RoomStreamToken, MultiWriterStreamToken]:
"""Returns the stream ID for the given key."""
return getattr(self, key.value)
2019-06-20 11:32:02 +02:00
StreamToken.START = StreamToken(
RoomStreamToken(stream=0), 0, 0, MultiWriterStreamToken(stream=0), 0, 0, 0, 0, 0, 0
)
@attr.s(slots=True, frozen=True, auto_attribs=True)
class PersistedPosition:
"""Position of a newly persisted row with instance that persisted it."""
instance_name: str
stream: int
def persisted_after(self, token: AbstractMultiWriterStreamToken) -> bool:
return token.get_stream_pos_for_instance(self.instance_name) < self.stream
@attr.s(slots=True, frozen=True, auto_attribs=True)
class PersistedEventPosition(PersistedPosition):
"""Position of a newly persisted event with instance that persisted it.
This can be used to test whether the event is persisted before or after a
RoomStreamToken.
"""
def to_room_stream_token(self) -> RoomStreamToken:
"""Converts the position to a room stream token such that events
persisted in the same room after this position will be after the
returned `RoomStreamToken`.
2021-02-12 17:01:48 +01:00
Note: no guarantees are made about ordering w.r.t. events in other
rooms.
"""
# Doing the naive thing satisfies the desired properties described in
# the docstring.
return RoomStreamToken(stream=self.stream)
@attr.s(slots=True, frozen=True, auto_attribs=True)
class ThirdPartyInstanceID:
appservice_id: Optional[str]
network_id: Optional[str]
# Deny iteration because it will bite you if you try to create a singleton
# set by:
# users = set(user)
def __iter__(self) -> NoReturn:
raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
# Because this class is a frozen class, it is deeply immutable.
def __copy__(self) -> "ThirdPartyInstanceID":
return self
def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID":
return self
@classmethod
def from_string(cls, s: str) -> "ThirdPartyInstanceID":
bits = s.split("|", 2)
if len(bits) != 2:
raise SynapseError(400, "Invalid ID %r" % (s,))
return cls(appservice_id=bits[0], network_id=bits[1])
def to_string(self) -> str:
return "%s|%s" % (self.appservice_id, self.network_id)
__str__ = to_string
@attr.s(slots=True, frozen=True, auto_attribs=True)
2020-09-04 12:54:56 +02:00
class ReadReceipt:
"""Information about a read-receipt"""
2019-06-20 11:32:02 +02:00
room_id: str
receipt_type: str
user_id: str
event_ids: List[str]
thread_id: Optional[str]
data: JsonDict
2019-07-25 17:08:24 +02:00
@attr.s(slots=True, frozen=True, auto_attribs=True)
class DeviceListUpdates:
"""
An object containing a diff of information regarding other users' device lists, intended for
a recipient to carry out device list tracking.
Attributes:
changed: A set of users whose device lists have changed recently.
left: A set of users who the recipient no longer needs to track the device lists of.
Typically when those users no longer share any end-to-end encryption enabled rooms.
"""
# We need to use a factory here, otherwise `set` is not evaluated at
# object instantiation, but instead at class definition instantiation.
# The latter happening only once, thus always giving you the same sets
# across multiple DeviceListUpdates instances.
# Also see: don't define mutable default arguments.
changed: Set[str] = attr.ib(factory=set)
left: Set[str] = attr.ib(factory=set)
def __bool__(self) -> bool:
return bool(self.changed or self.left)
def get_verify_key_from_cross_signing_key(
key_info: Mapping[str, Any]
) -> Tuple[str, VerifyKey]:
2019-07-25 17:08:24 +02:00
"""Get the key ID and signedjson verify key from a cross-signing key dict
Args:
key_info: a cross-signing key dict, which must have a "keys"
2019-07-25 17:08:24 +02:00
property that has exactly one item in it
Returns:
the key ID and verify key for the cross-signing key
2019-07-25 17:08:24 +02:00
"""
# make sure that a `keys` field is provided
2019-07-25 17:08:24 +02:00
if "keys" not in key_info:
2019-08-02 03:51:19 +02:00
raise ValueError("Invalid key")
2019-07-25 17:08:24 +02:00
keys = key_info["keys"]
# and that it contains exactly one key
if len(keys) == 1:
key_id, key_data = next(iter(keys.items()))
return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))
else:
raise ValueError("Invalid key")
@attr.s(auto_attribs=True, frozen=True, slots=True)
class UserInfo:
2023-09-14 13:46:30 +02:00
"""Holds information about a user. Result of get_user_by_id.
Attributes:
user_id: ID of the user.
appservice_id: Application service ID that created this user.
consent_server_notice_sent: Version of policy documents the user has been sent.
consent_version: Version of policy documents the user has consented to.
2023-09-14 13:46:30 +02:00
consent_ts: Time the user consented
creation_ts: Creation timestamp of the user.
is_admin: True if the user is an admin.
is_deactivated: True if the user has been deactivated.
is_guest: True if the user is a guest user.
is_shadow_banned: True if the user has been shadow-banned.
user_type: User type (None for normal user, 'support' and 'bot' other options).
2023-09-14 13:46:30 +02:00
approved: If the user has been "approved" to register on the server.
locked: Whether the user's account has been locked
"""
user_id: UserID
appservice_id: Optional[int]
consent_server_notice_sent: Optional[str]
consent_version: Optional[str]
2023-09-14 13:46:30 +02:00
consent_ts: Optional[int]
user_type: Optional[str]
creation_ts: int
is_admin: bool
is_deactivated: bool
is_guest: bool
is_shadow_banned: bool
2023-09-14 13:46:30 +02:00
approved: bool
locked: bool
class UserProfile(TypedDict):
user_id: str
display_name: Optional[str]
avatar_url: Optional[str]
@attr.s(auto_attribs=True, frozen=True, slots=True)
class RetentionPolicy:
min_lifetime: Optional[int] = None
max_lifetime: Optional[int] = None
class TaskStatus(str, Enum):
"""Status of a scheduled task"""
# Task is scheduled but not active
SCHEDULED = "scheduled"
# Task is active and probably running, and if not
# will be run on next scheduler loop run
ACTIVE = "active"
# Task has completed successfully
COMPLETE = "complete"
# Task is over and either returned a failed status, or had an exception
FAILED = "failed"
@attr.s(auto_attribs=True, frozen=True, slots=True)
class ScheduledTask:
"""Description of a scheduled task"""
# Id used to identify the task
id: str
# Name of the action to be run by this task
action: str
# Current status of this task
status: TaskStatus
# If the status is SCHEDULED then this represents when it should be launched,
# otherwise it represents the last time this task got a change of state.
# In milliseconds since epoch in system time timezone, usually UTC.
timestamp: int
# Optionally bind a task to some resource id for easy retrieval
resource_id: Optional[str]
# Optional parameters that will be passed to the function ran by the task
params: Optional[JsonMapping]
# Optional result that can be updated by the running task
result: Optional[JsonMapping]
# Optional error that should be assigned a value when the status is FAILED
error: Optional[str]