Add admin API to reset connection timeouts for remote server (#11639)

* Fix get federation status of destination if no error occured
This commit is contained in:
Dirk Klimpel 2022-01-25 13:06:29 +01:00 committed by GitHub
parent 15c2a6a106
commit 0d6cfea9b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 183 additions and 33 deletions

View file

@ -0,0 +1 @@
Add admin API to reset connection timeouts for remote server.

View file

@ -86,7 +86,7 @@ The following fields are returned in the JSON response body:
- `next_token`: string representing a positive integer - Indication for pagination. See above.
- `total` - integer - Total number of destinations.
# Destination Details API
## Destination Details API
This API gets the retry timing info for a specific remote server.
@ -108,7 +108,45 @@ A response body like the following is returned:
}
```
**Parameters**
The following parameters should be set in the URL:
- `destination` - Name of the remote server.
**Response**
The response fields are the same like in the `destinations` array in
[List of destinations](#list-of-destinations) response.
## Reset connection timeout
Synapse makes federation requests to other homeservers. If a federation request fails,
Synapse will mark the destination homeserver as offline, preventing any future requests
to that server for a "cooldown" period. This period grows over time if the server
continues to fail its responses
([exponential backoff](https://en.wikipedia.org/wiki/Exponential_backoff)).
Admins can cancel the cooldown period with this API.
This API resets the retry timing for a specific remote server and tries to connect to
the remote server again. It does not wait for the next `retry_interval`.
The connection must have previously run into an error and `retry_last_ts`
([Destination Details API](#destination-details-api)) must not be equal to `0`.
The connection attempt is carried out in the background and can take a while
even if the API already returns the http status 200.
The API is:
```
POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection
{}
```
**Parameters**
The following parameters should be set in the URL:
- `destination` - Name of the remote server.

View file

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Dict, Iterable, List, Optional, Tuple, Type
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type
from typing_extensions import Literal
@ -36,17 +36,19 @@ from synapse.http.servlet import (
parse_integer_from_args,
parse_string_from_args,
)
from synapse.server import HomeServer
from synapse.types import JsonDict, ThirdPartyInstanceID
from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
class TransportLayerServer(JsonResource):
"""Handles incoming federation HTTP requests"""
def __init__(self, hs: HomeServer, servlet_groups: Optional[List[str]] = None):
def __init__(self, hs: "HomeServer", servlet_groups: Optional[List[str]] = None):
"""Initialize the TransportLayerServer
Will by default register all servlets. For custom behaviour, pass in
@ -113,7 +115,7 @@ class PublicRoomList(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -203,7 +205,7 @@ class FederationGroupsRenewAttestaionServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -251,7 +253,7 @@ class OpenIdUserInfo(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -297,7 +299,7 @@ DEFAULT_SERVLET_GROUPS: Dict[str, Iterable[Type[BaseFederationServlet]]] = {
def register_servlets(
hs: HomeServer,
hs: "HomeServer",
resource: HttpServer,
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,

View file

@ -15,7 +15,7 @@
import functools
import logging
import re
from typing import Any, Awaitable, Callable, Optional, Tuple, cast
from typing import TYPE_CHECKING, Any, Awaitable, Callable, Optional, Tuple, cast
from synapse.api.errors import Codes, FederationDeniedError, SynapseError
from synapse.api.urls import FEDERATION_V1_PREFIX
@ -29,11 +29,13 @@ from synapse.logging.opentracing import (
start_active_span_follows_from,
whitelisted_homeserver,
)
from synapse.server import HomeServer
from synapse.types import JsonDict
from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.stringutils import parse_and_validate_server_name
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
@ -46,7 +48,7 @@ class NoAuthenticationError(AuthenticationError):
class Authenticator:
def __init__(self, hs: HomeServer):
def __init__(self, hs: "HomeServer"):
self._clock = hs.get_clock()
self.keyring = hs.get_keyring()
self.server_name = hs.hostname
@ -114,11 +116,11 @@ class Authenticator:
# alive
retry_timings = await self.store.get_destination_retry_timings(origin)
if retry_timings and retry_timings.retry_last_ts:
run_in_background(self._reset_retry_timings, origin)
run_in_background(self.reset_retry_timings, origin)
return origin
async def _reset_retry_timings(self, origin: str) -> None:
async def reset_retry_timings(self, origin: str) -> None:
try:
logger.info("Marking origin %r as up", origin)
await self.store.set_destination_retry_timings(origin, None, 0, 0)
@ -227,7 +229,7 @@ class BaseFederationServlet:
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,

View file

@ -12,7 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
from typing import (
TYPE_CHECKING,
Dict,
List,
Mapping,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from typing_extensions import Literal
@ -30,11 +40,13 @@ from synapse.http.servlet import (
parse_string_from_args,
parse_strings_from_args,
)
from synapse.server import HomeServer
from synapse.types import JsonDict
from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.versionstring import get_version_string
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
issue_8631_logger = logging.getLogger("synapse.8631_debug")
@ -47,7 +59,7 @@ class BaseFederationServerServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -596,7 +608,7 @@ class FederationSpaceSummaryServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -670,7 +682,7 @@ class FederationRoomHierarchyServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,
@ -706,7 +718,7 @@ class RoomComplexityServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, List, Tuple, Type
from typing import TYPE_CHECKING, Dict, List, Tuple, Type
from synapse.api.errors import SynapseError
from synapse.federation.transport.server._base import (
@ -19,10 +19,12 @@ from synapse.federation.transport.server._base import (
BaseFederationServlet,
)
from synapse.handlers.groups_local import GroupsLocalHandler
from synapse.server import HomeServer
from synapse.types import JsonDict, get_domain_from_id
from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
class BaseGroupsLocalServlet(BaseFederationServlet):
"""Abstract base class for federation servlet classes which provides a groups local handler.
@ -32,7 +34,7 @@ class BaseGroupsLocalServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, List, Tuple, Type
from typing import TYPE_CHECKING, Dict, List, Tuple, Type
from typing_extensions import Literal
@ -22,10 +22,12 @@ from synapse.federation.transport.server._base import (
BaseFederationServlet,
)
from synapse.http.servlet import parse_string_from_args
from synapse.server import HomeServer
from synapse.types import JsonDict, get_domain_from_id
from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
class BaseGroupsServerServlet(BaseFederationServlet):
"""Abstract base class for federation servlet classes which provides a groups server handler.
@ -35,7 +37,7 @@ class BaseGroupsServerServlet(BaseFederationServlet):
def __init__(
self,
hs: HomeServer,
hs: "HomeServer",
authenticator: Authenticator,
ratelimiter: FederationRateLimiter,
server_name: str,

View file

@ -41,7 +41,8 @@ from synapse.rest.admin.event_reports import (
EventReportsRestServlet,
)
from synapse.rest.admin.federation import (
DestinationsRestServlet,
DestinationResetConnectionRestServlet,
DestinationRestServlet,
ListDestinationsRestServlet,
)
from synapse.rest.admin.groups import DeleteGroupAdminRestServlet
@ -267,7 +268,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
ListRegistrationTokensRestServlet(hs).register(http_server)
NewRegistrationTokenRestServlet(hs).register(http_server)
RegistrationTokenRestServlet(hs).register(http_server)
DestinationsRestServlet(hs).register(http_server)
DestinationResetConnectionRestServlet(hs).register(http_server)
DestinationRestServlet(hs).register(http_server)
ListDestinationsRestServlet(hs).register(http_server)
# Some servlets only get registered for the main process.

View file

@ -16,6 +16,7 @@ from http import HTTPStatus
from typing import TYPE_CHECKING, Tuple
from synapse.api.errors import Codes, NotFoundError, SynapseError
from synapse.federation.transport.server import Authenticator
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
@ -90,7 +91,7 @@ class ListDestinationsRestServlet(RestServlet):
return HTTPStatus.OK, response
class DestinationsRestServlet(RestServlet):
class DestinationRestServlet(RestServlet):
"""Get details of a destination.
This needs user to have administrator access in Synapse.
@ -145,3 +146,44 @@ class DestinationsRestServlet(RestServlet):
}
return HTTPStatus.OK, response
class DestinationResetConnectionRestServlet(RestServlet):
"""Reset destinations' connection timeouts and wake it up.
This needs user to have administrator access in Synapse.
POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection
{}
returns:
200 OK otherwise an error.
"""
PATTERNS = admin_patterns(
"/federation/destinations/(?P<destination>[^/]+)/reset_connection$"
)
def __init__(self, hs: "HomeServer"):
self._auth = hs.get_auth()
self._store = hs.get_datastore()
self._authenticator = Authenticator(hs)
async def on_POST(
self, request: SynapseRequest, destination: str
) -> Tuple[int, JsonDict]:
await assert_requester_is_admin(self._auth, request)
if not await self._store.is_destination_known(destination):
raise NotFoundError("Unknown destination")
retry_timings = await self._store.get_destination_retry_timings(destination)
if not (retry_timings and retry_timings.retry_last_ts):
raise SynapseError(
HTTPStatus.BAD_REQUEST,
"The retry timing does not need to be reset for this destination.",
)
# reset timings and wake up
await self._authenticator.reset_retry_timings(destination)
return HTTPStatus.OK, {}

View file

@ -43,11 +43,15 @@ class FederationTestCase(unittest.HomeserverTestCase):
@parameterized.expand(
[
("/_synapse/admin/v1/federation/destinations",),
("/_synapse/admin/v1/federation/destinations/dummy",),
("GET", "/_synapse/admin/v1/federation/destinations"),
("GET", "/_synapse/admin/v1/federation/destinations/dummy"),
(
"POST",
"/_synapse/admin/v1/federation/destinations/dummy/reset_connection",
),
]
)
def test_requester_is_no_admin(self, url: str) -> None:
def test_requester_is_no_admin(self, method: str, url: str) -> None:
"""
If the user is not a server admin, an error 403 is returned.
"""
@ -56,7 +60,7 @@ class FederationTestCase(unittest.HomeserverTestCase):
other_user_tok = self.login("user", "pass")
channel = self.make_request(
"GET",
method,
url,
content={},
access_token=other_user_tok,
@ -120,6 +124,16 @@ class FederationTestCase(unittest.HomeserverTestCase):
self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
# invalid destination
channel = self.make_request(
"POST",
self.url + "/dummy/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
def test_limit(self) -> None:
"""
Testing list of destinations with limit
@ -444,6 +458,39 @@ class FederationTestCase(unittest.HomeserverTestCase):
self.assertIsNone(channel.json_body["failure_ts"])
self.assertIsNone(channel.json_body["last_successful_stream_ordering"])
def test_destination_reset_connection(self) -> None:
"""Reset timeouts and wake up destination."""
self._create_destination("sub0.example.com", 100, 100, 100)
channel = self.make_request(
"POST",
self.url + "/sub0.example.com/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)
retry_timings = self.get_success(
self.store.get_destination_retry_timings("sub0.example.com")
)
self.assertIsNone(retry_timings)
def test_destination_reset_connection_not_required(self) -> None:
"""Try to reset timeouts of a destination with no timeouts and get an error."""
self._create_destination("sub0.example.com", None, 0, 0)
channel = self.make_request(
"POST",
self.url + "/sub0.example.com/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body)
self.assertEqual(
"The retry timing does not need to be reset for this destination.",
channel.json_body["error"],
)
def _create_destination(
self,
destination: str,