Fix potential autoreconnect problem and add exponential backoff
This commit is contained in:
parent
498c0e4130
commit
7f0c67168c
3 changed files with 25 additions and 11 deletions
|
@ -35,6 +35,7 @@ type BridgeConfig struct {
|
|||
|
||||
ConnectionTimeout int `yaml:"connection_timeout"`
|
||||
MaxConnectionAttempts int `yaml:"max_connection_attempts"`
|
||||
ConnectionRetryDelay int `yaml:"connection_retry_delay"`
|
||||
ReportConnectionRetry bool `yaml:"report_connection_retry"`
|
||||
|
||||
InitialChatSync int `yaml:"initial_chat_sync_count"`
|
||||
|
@ -56,6 +57,7 @@ type BridgeConfig struct {
|
|||
func (bc *BridgeConfig) setDefaults() {
|
||||
bc.ConnectionTimeout = 20
|
||||
bc.MaxConnectionAttempts = 3
|
||||
bc.ConnectionRetryDelay = -1
|
||||
bc.ReportConnectionRetry = true
|
||||
|
||||
bc.InitialChatSync = 10
|
||||
|
|
|
@ -62,6 +62,9 @@ bridge:
|
|||
connection_timeout: 20
|
||||
# Maximum number of times to retry connecting on connection error.
|
||||
max_connection_attempts: 3
|
||||
# Number of seconds to wait between connection attempts.
|
||||
# Negative numbers are exponential backoff: -connection_retry_delay + 1 + 2^attempts
|
||||
connection_retry_delay: -1
|
||||
# Whether or not the bridge should send a notice to the user's management room when it retries connecting.
|
||||
# If false, it will only report when it stops retrying.
|
||||
report_connection_retry: true
|
||||
|
|
31
user.go
31
user.go
|
@ -348,23 +348,22 @@ func (user *User) HandleError(err error) {
|
|||
if errors.Cause(err) != whatsapp.ErrInvalidWsData {
|
||||
user.log.Errorln("WhatsApp error:", err)
|
||||
}
|
||||
var msg string
|
||||
if closed, ok := err.(*whatsapp.ErrConnectionClosed); ok {
|
||||
user.Connected = false
|
||||
if closed.Code == 1000 {
|
||||
// Normal closure
|
||||
return
|
||||
}
|
||||
user.ConnectionErrors++
|
||||
msg = fmt.Sprintf("Your WhatsApp connection was closed with websocket status code %d", closed.Code)
|
||||
go user.tryReconnect(fmt.Sprintf("Your WhatsApp connection was closed with websocket status code %d", closed.Code))
|
||||
} else if failed, ok := err.(*whatsapp.ErrConnectionFailed); ok {
|
||||
user.Connected = false
|
||||
user.ConnectionErrors++
|
||||
msg = fmt.Sprintf("Your WhatsApp connection failed: %v", failed.Err)
|
||||
} else {
|
||||
// Unknown error, probably mostly harmless
|
||||
return
|
||||
go user.tryReconnect(fmt.Sprintf("Your WhatsApp connection failed: %v", failed.Err))
|
||||
}
|
||||
// Otherwise unknown error, probably mostly harmless
|
||||
}
|
||||
|
||||
func (user *User) tryReconnect(msg string) {
|
||||
if user.ConnectionErrors > user.bridge.Config.Bridge.MaxConnectionAttempts {
|
||||
content := format.RenderMarkdown(fmt.Sprintf("%s. Use the `reconnect` command to reconnect.", msg))
|
||||
_, _ = user.bridge.Bot.SendMessageEvent(user.ManagementRoom, mautrix.EventMessage, content)
|
||||
|
@ -375,9 +374,16 @@ func (user *User) HandleError(err error) {
|
|||
// Don't want the same error to be repeated
|
||||
msg = ""
|
||||
}
|
||||
tries := 0
|
||||
var tries uint
|
||||
var exponentialBackoff bool
|
||||
baseDelay := time.Duration(user.bridge.Config.Bridge.ConnectionRetryDelay)
|
||||
if baseDelay < 0 {
|
||||
exponentialBackoff = true
|
||||
baseDelay = -baseDelay + 1
|
||||
}
|
||||
delay := baseDelay
|
||||
for user.ConnectionErrors <= user.bridge.Config.Bridge.MaxConnectionAttempts {
|
||||
err = user.Conn.Restore()
|
||||
err := user.Conn.Restore()
|
||||
if err == nil {
|
||||
user.ConnectionErrors = 0
|
||||
user.Connected = true
|
||||
|
@ -389,11 +395,14 @@ func (user *User) HandleError(err error) {
|
|||
tries++
|
||||
user.ConnectionErrors++
|
||||
if user.ConnectionErrors <= user.bridge.Config.Bridge.MaxConnectionAttempts {
|
||||
if exponentialBackoff {
|
||||
delay = (1 << tries) + baseDelay
|
||||
}
|
||||
if user.bridge.Config.Bridge.ReportConnectionRetry {
|
||||
_, _ = user.bridge.Bot.SendNotice(user.ManagementRoom,
|
||||
fmt.Sprintf("Reconnection attempt failed: %v. Retrying in 10 seconds...", err))
|
||||
fmt.Sprintf("Reconnection attempt failed: %v. Retrying in %d seconds...", err, delay))
|
||||
}
|
||||
time.Sleep(10 * time.Second)
|
||||
time.Sleep(delay * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue