Implement MSC4009 to widen the allowed Matrix ID grammar (#15536)

Behind a configuration flag this adds + to the list of allowed
characters in Matrix IDs. The main feature this enables is
using full E.164 phone numbers as Matrix IDs.
This commit is contained in:
Patrick Cloke 2023-05-05 12:13:50 -04:00 committed by GitHub
parent a0f53afd62
commit 36df9c5e36
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 17 deletions

View file

@ -0,0 +1 @@
Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs.

View file

@ -199,3 +199,6 @@ class ExperimentalConfig(Config):
# MSC3970: Scope transaction IDs to devices
self.msc3970_enabled = experimental.get("msc3970_enabled", False)
# MSC4009: E.164 Matrix IDs
self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)

View file

@ -46,7 +46,7 @@ from synapse.replication.http.register import (
ReplicationRegisterServlet,
)
from synapse.spam_checker_api import RegistrationBehaviour
from synapse.types import RoomAlias, UserID, create_requester
from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester
from synapse.types.state import StateFilter
if TYPE_CHECKING:
@ -143,10 +143,15 @@ class RegistrationHandler:
assigned_user_id: Optional[str] = None,
inhibit_user_in_use_error: bool = False,
) -> None:
if types.contains_invalid_mxid_characters(localpart):
if types.contains_invalid_mxid_characters(
localpart, self.hs.config.experimental.msc4009_e164_mxids
):
extra_chars = (
"=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
)
raise SynapseError(
400,
"User ID can only contain characters a-z, 0-9, or '=_-./'",
f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
Codes.INVALID_USERNAME,
)
@ -195,16 +200,12 @@ class RegistrationHandler:
errcode=Codes.FORBIDDEN,
)
if guest_access_token is None:
try:
int(localpart)
if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart):
raise SynapseError(
400,
"Numeric user IDs are reserved for guest users.",
errcode=Codes.INVALID_USERNAME,
)
except ValueError:
pass
async def register_user(
self,

View file

@ -225,6 +225,8 @@ class SsoHandler:
self._consent_at_registration = hs.config.consent.user_consent_at_registration
self._e164_mxids = hs.config.experimental.msc4009_e164_mxids
def register_identity_provider(self, p: SsoIdentityProvider) -> None:
p_id = p.idp_id
assert p_id not in self._identity_providers
@ -711,7 +713,7 @@ class SsoHandler:
# Since the localpart is provided via a potentially untrusted module,
# ensure the MXID is valid before registering.
if not attributes.localpart or contains_invalid_mxid_characters(
attributes.localpart
attributes.localpart, self._e164_mxids
):
raise MappingException("localpart is invalid: %s" % (attributes.localpart,))
@ -944,7 +946,7 @@ class SsoHandler:
localpart,
)
if contains_invalid_mxid_characters(localpart):
if contains_invalid_mxid_characters(localpart, self._e164_mxids):
raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
user_id = UserID(localpart, self._server_name).to_string()
user_infos = await self._store.get_users_by_id_case_insensitive(user_id)

View file

@ -335,18 +335,35 @@ class EventID(DomainSpecificString):
mxid_localpart_allowed_characters = set(
"_-./=" + string.ascii_lowercase + string.digits
)
# MSC4007 adds the + to the allowed characters.
#
# TODO If this was accepted, update the SSO code to support this, see the callers
# of map_username_to_mxid_localpart.
extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}
# Guest user IDs are purely numeric.
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
def contains_invalid_mxid_characters(localpart: str) -> bool:
def contains_invalid_mxid_characters(
localpart: str, use_extended_character_set: bool
) -> bool:
"""Check for characters not allowed in an mxid or groupid localpart
Args:
localpart: the localpart to be checked
use_extended_character_set: True to use the extended allowed characters
from MSC4009.
Returns:
True if there are any naughty characters
"""
return any(c not in mxid_localpart_allowed_characters for c in localpart)
allowed_characters = (
extended_mxid_localpart_allowed_characters
if use_extended_character_set
else mxid_localpart_allowed_characters
)
return any(c not in allowed_characters for c in localpart)
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")

View file

@ -586,6 +586,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
d = self.store.is_support_user(user_id)
self.assertFalse(self.get_success(d))
def test_invalid_user_id(self) -> None:
invalid_user_id = "+abcd"
self.get_failure(
self.handler.register_user(localpart=invalid_user_id), SynapseError
)
@override_config({"experimental_features": {"msc4009_e164_mxids": True}})
def text_extended_user_ids(self) -> None:
"""+ should be allowed according to MSC4009."""
valid_user_id = "+1234"
user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
self.assertEqual(user_id, valid_user_id)
def test_invalid_user_id_length(self) -> None:
invalid_user_id = "x" * 256
self.get_failure(