mirror of
https://github.com/matrix-org/dendrite
synced 2024-12-14 06:33:50 +01:00
Make 'Device list doesn't change if remote server is down' pass (#1268)
- As a last resort, query the DB when exhausting all possible remote query endpoints, but keep the field in `failures` so clients can detect that this is stale data. - Unblock `DeviceListUpdater.Update` on failures rather than timing out. - Use a mutex when writing directly to `res`, not just for failures.
This commit is contained in:
parent
4c4732a9c9
commit
20c8f252a7
3 changed files with 78 additions and 58 deletions
|
@ -342,10 +342,12 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
|
|||
if err != nil {
|
||||
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
|
||||
hasFailures = true
|
||||
} else {
|
||||
u.clearChannel(userID)
|
||||
}
|
||||
}
|
||||
for _, userID := range userIDs {
|
||||
// always clear the channel to unblock Update calls regardless of success/failure
|
||||
u.clearChannel(userID)
|
||||
}
|
||||
return hasFailures
|
||||
}
|
||||
|
||||
|
|
|
@ -318,12 +318,39 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
|||
// allows us to wait until all federation servers have been poked
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(domainToDeviceKeys))
|
||||
// mutex for failures
|
||||
var failMu sync.Mutex
|
||||
// mutex for writing directly to res (e.g failures)
|
||||
var respMu sync.Mutex
|
||||
|
||||
// fan out
|
||||
for domain, deviceKeys := range domainToDeviceKeys {
|
||||
go func(serverName string, devKeys map[string][]string) {
|
||||
go a.queryRemoteKeysOnServer(ctx, domain, deviceKeys, &wg, &respMu, timeout, resultCh, res)
|
||||
}
|
||||
|
||||
// Close the result channel when the goroutines have quit so the for .. range exits
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultCh)
|
||||
}()
|
||||
|
||||
for result := range resultCh {
|
||||
for userID, nest := range result.DeviceKeys {
|
||||
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
|
||||
for deviceID, deviceKey := range nest {
|
||||
keyJSON, err := json.Marshal(deviceKey)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
res.DeviceKeys[userID][deviceID] = keyJSON
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *KeyInternalAPI) queryRemoteKeysOnServer(
|
||||
ctx context.Context, serverName string, devKeys map[string][]string, wg *sync.WaitGroup,
|
||||
respMu *sync.Mutex, timeout time.Duration, resultCh chan<- *gomatrixserverlib.RespQueryKeys,
|
||||
res *api.QueryKeysResponse,
|
||||
) {
|
||||
defer wg.Done()
|
||||
fedCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
@ -351,7 +378,9 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
|||
}
|
||||
// refresh entries from DB: unlike remoteKeysFromDatabase we know we previously had no device info for this
|
||||
// user so the fact that we're populating all devices here isn't a problem so long as we have devices.
|
||||
respMu.Lock()
|
||||
err = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, nil)
|
||||
respMu.Unlock()
|
||||
if err != nil {
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
|
@ -367,36 +396,24 @@ func (a *KeyInternalAPI) queryRemoteKeys(
|
|||
return
|
||||
}
|
||||
queryKeysResp, err := a.FedClient.QueryKeys(fedCtx, gomatrixserverlib.ServerName(serverName), devKeys)
|
||||
if err != nil {
|
||||
failMu.Lock()
|
||||
if err == nil {
|
||||
resultCh <- &queryKeysResp
|
||||
return
|
||||
}
|
||||
respMu.Lock()
|
||||
res.Failures[serverName] = map[string]interface{}{
|
||||
"message": err.Error(),
|
||||
}
|
||||
failMu.Unlock()
|
||||
return
|
||||
}
|
||||
resultCh <- &queryKeysResp
|
||||
}(domain, deviceKeys)
|
||||
}
|
||||
|
||||
// Close the result channel when the goroutines have quit so the for .. range exits
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultCh)
|
||||
}()
|
||||
// last ditch, use the cache only. This is good for when clients hit /keys/query and the remote server
|
||||
// is down, better to return something than nothing at all. Clients can know about the failure by
|
||||
// inspecting the failures map though so they can know it's a cached response.
|
||||
for userID, dkeys := range devKeys {
|
||||
// drop the error as it's already a failure at this point
|
||||
_ = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, dkeys)
|
||||
}
|
||||
respMu.Unlock()
|
||||
|
||||
for result := range resultCh {
|
||||
for userID, nest := range result.DeviceKeys {
|
||||
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
|
||||
for deviceID, deviceKey := range nest {
|
||||
keyJSON, err := json.Marshal(deviceKey)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
res.DeviceKeys[userID][deviceID] = keyJSON
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *KeyInternalAPI) populateResponseWithDeviceKeysFromDatabase(
|
||||
|
|
|
@ -148,6 +148,7 @@ Get left notifs in sync and /keys/changes when other user leaves
|
|||
Can query remote device keys using POST after notification
|
||||
Server correctly resyncs when client query keys and there is no remote cache
|
||||
Server correctly resyncs when server leaves and rejoins a room
|
||||
Device list doesn't change if remote server is down
|
||||
Can add account data
|
||||
Can add account data to room
|
||||
Can get account data without syncing
|
||||
|
|
Loading…
Reference in a new issue