0
0
Fork 0
mirror of https://github.com/matrix-org/dendrite synced 2024-12-14 08:53:48 +01:00

Make 'Device list doesn't change if remote server is down' pass (#1268)

- As a last resort, query the DB when exhausting all possible remote query
  endpoints, but keep the field in `failures` so clients can detect that this
  is stale data.
- Unblock `DeviceListUpdater.Update` on failures rather than timing out.
- Use a mutex when writing directly to `res`, not just for failures.
This commit is contained in:
Kegsay 2020-08-13 16:43:27 +01:00 committed by GitHub
parent 4c4732a9c9
commit 20c8f252a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 58 deletions

View file

@ -342,10 +342,12 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
if err != nil { if err != nil {
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it") logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
hasFailures = true hasFailures = true
} else {
u.clearChannel(userID)
} }
} }
for _, userID := range userIDs {
// always clear the channel to unblock Update calls regardless of success/failure
u.clearChannel(userID)
}
return hasFailures return hasFailures
} }

View file

@ -318,12 +318,39 @@ func (a *KeyInternalAPI) queryRemoteKeys(
// allows us to wait until all federation servers have been poked // allows us to wait until all federation servers have been poked
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(domainToDeviceKeys)) wg.Add(len(domainToDeviceKeys))
// mutex for failures // mutex for writing directly to res (e.g failures)
var failMu sync.Mutex var respMu sync.Mutex
// fan out // fan out
for domain, deviceKeys := range domainToDeviceKeys { for domain, deviceKeys := range domainToDeviceKeys {
go func(serverName string, devKeys map[string][]string) { go a.queryRemoteKeysOnServer(ctx, domain, deviceKeys, &wg, &respMu, timeout, resultCh, res)
}
// Close the result channel when the goroutines have quit so the for .. range exits
go func() {
wg.Wait()
close(resultCh)
}()
for result := range resultCh {
for userID, nest := range result.DeviceKeys {
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
for deviceID, deviceKey := range nest {
keyJSON, err := json.Marshal(deviceKey)
if err != nil {
continue
}
res.DeviceKeys[userID][deviceID] = keyJSON
}
}
}
}
func (a *KeyInternalAPI) queryRemoteKeysOnServer(
ctx context.Context, serverName string, devKeys map[string][]string, wg *sync.WaitGroup,
respMu *sync.Mutex, timeout time.Duration, resultCh chan<- *gomatrixserverlib.RespQueryKeys,
res *api.QueryKeysResponse,
) {
defer wg.Done() defer wg.Done()
fedCtx, cancel := context.WithTimeout(ctx, timeout) fedCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel() defer cancel()
@ -351,7 +378,9 @@ func (a *KeyInternalAPI) queryRemoteKeys(
} }
// refresh entries from DB: unlike remoteKeysFromDatabase we know we previously had no device info for this // refresh entries from DB: unlike remoteKeysFromDatabase we know we previously had no device info for this
// user so the fact that we're populating all devices here isn't a problem so long as we have devices. // user so the fact that we're populating all devices here isn't a problem so long as we have devices.
respMu.Lock()
err = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, nil) err = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, nil)
respMu.Unlock()
if err != nil { if err != nil {
logrus.WithFields(logrus.Fields{ logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err, logrus.ErrorKey: err,
@ -367,36 +396,24 @@ func (a *KeyInternalAPI) queryRemoteKeys(
return return
} }
queryKeysResp, err := a.FedClient.QueryKeys(fedCtx, gomatrixserverlib.ServerName(serverName), devKeys) queryKeysResp, err := a.FedClient.QueryKeys(fedCtx, gomatrixserverlib.ServerName(serverName), devKeys)
if err != nil { if err == nil {
failMu.Lock() resultCh <- &queryKeysResp
return
}
respMu.Lock()
res.Failures[serverName] = map[string]interface{}{ res.Failures[serverName] = map[string]interface{}{
"message": err.Error(), "message": err.Error(),
} }
failMu.Unlock()
return
}
resultCh <- &queryKeysResp
}(domain, deviceKeys)
}
// Close the result channel when the goroutines have quit so the for .. range exits // last ditch, use the cache only. This is good for when clients hit /keys/query and the remote server
go func() { // is down, better to return something than nothing at all. Clients can know about the failure by
wg.Wait() // inspecting the failures map though so they can know it's a cached response.
close(resultCh) for userID, dkeys := range devKeys {
}() // drop the error as it's already a failure at this point
_ = a.populateResponseWithDeviceKeysFromDatabase(ctx, res, userID, dkeys)
}
respMu.Unlock()
for result := range resultCh {
for userID, nest := range result.DeviceKeys {
res.DeviceKeys[userID] = make(map[string]json.RawMessage)
for deviceID, deviceKey := range nest {
keyJSON, err := json.Marshal(deviceKey)
if err != nil {
continue
}
res.DeviceKeys[userID][deviceID] = keyJSON
}
}
}
} }
func (a *KeyInternalAPI) populateResponseWithDeviceKeysFromDatabase( func (a *KeyInternalAPI) populateResponseWithDeviceKeysFromDatabase(

View file

@ -148,6 +148,7 @@ Get left notifs in sync and /keys/changes when other user leaves
Can query remote device keys using POST after notification Can query remote device keys using POST after notification
Server correctly resyncs when client query keys and there is no remote cache Server correctly resyncs when client query keys and there is no remote cache
Server correctly resyncs when server leaves and rejoins a room Server correctly resyncs when server leaves and rejoins a room
Device list doesn't change if remote server is down
Can add account data Can add account data
Can add account data to room Can add account data to room
Can get account data without syncing Can get account data without syncing