Add HTTP status code to FederationClientError (#2699)

Also ensures we wait on more HTTP status codes.
This commit is contained in:
Till 2022-09-07 16:14:09 +02:00 committed by GitHub
parent 7e8c605f98
commit 0d697f6754
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 12 deletions

View File

@ -5,9 +5,10 @@ import (
"fmt"
"time"
"github.com/matrix-org/dendrite/federationapi/types"
"github.com/matrix-org/gomatrix"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/dendrite/federationapi/types"
)
// FederationInternalAPI is used to query information from the federation sender.
@ -108,6 +109,7 @@ type FederationClientError struct {
Err string
RetryAfter time.Duration
Blacklisted bool
Code int // HTTP Status code from the remote server
}
func (e FederationClientError) Error() string {

View File

@ -6,10 +6,12 @@ import (
"net/http"
"github.com/gorilla/mux"
"github.com/matrix-org/dendrite/federationapi/api"
"github.com/matrix-org/dendrite/internal/httputil"
"github.com/matrix-org/gomatrix"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
"github.com/matrix-org/dendrite/federationapi/api"
"github.com/matrix-org/dendrite/internal/httputil"
)
// AddRoutes adds the FederationInternalAPI handlers to the http.ServeMux.
@ -229,6 +231,10 @@ func federationClientError(err error) error {
return &ferr
case *api.FederationClientError:
return ferr
case gomatrix.HTTPError:
return &api.FederationClientError{
Code: ferr.Code,
}
default:
return &api.FederationClientError{
Err: err.Error(),

View File

@ -407,10 +407,13 @@ userLoop:
waitTime = e.RetryAfter
} else if e.Blacklisted {
waitTime = time.Hour * 8
} else {
break userLoop
} else if e.Code >= 300 {
// We didn't get a real FederationClientError (e.g. in polylith mode, where gomatrix.HTTPError
// are "converted" to FederationClientError), but we probably shouldn't hit them every $waitTime seconds.
waitTime = time.Hour
break userLoop
}
break userLoop
case net.Error:
// Use the default waitTime, if it's a timeout.
// It probably doesn't make sense to try further users.
@ -420,9 +423,10 @@ userLoop:
break userLoop
}
case gomatrix.HTTPError:
// The remote server returned an error, give it some time to recover
if e.Code >= 500 {
waitTime = time.Minute * 10
// The remote server returned an error, give it some time to recover.
// This is to avoid spamming remote servers, which may not be Matrix servers anymore.
if e.Code >= 300 {
waitTime = time.Hour
logrus.WithError(e).Error("GetUserDevices returned gomatrix.HTTPError")
break userLoop
}
@ -459,9 +463,10 @@ userLoop:
}
if failCount > 0 {
logger.WithFields(logrus.Fields{
"total": len(userIDs),
"failed": failCount,
"skipped": len(userIDs) - failCount,
"total": len(userIDs),
"failed": failCount,
"skipped": len(userIDs) - failCount,
"waittime": waitTime,
}).Warn("Failed to query device keys for some users")
}
for _, userID := range userIDs {

View File

@ -49,3 +49,6 @@ Notifications can be viewed with GET /notifications
If remote user leaves room we no longer receive device updates
Guest users can join guest_access rooms
# This will fail in HTTP API mode, so blacklisted for now
If a device list update goes missing, the server resyncs on the next one

View File

@ -742,4 +742,3 @@ User in private room doesn't appear in user directory
User joining then leaving public room appears and dissappears from directory
User in remote room doesn't appear in user directory after server left room
User in shared private room does appear in user directory until leave
If a device list update goes missing, the server resyncs on the next one