From 7f0c67168c72fa9dc3a8153c40db61d3cec878a2 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 28 May 2019 14:09:49 +0300 Subject: [PATCH] Fix potential autoreconnect problem and add exponential backoff --- config/bridge.go | 2 ++ example-config.yaml | 3 +++ user.go | 31 ++++++++++++++++++++----------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/config/bridge.go b/config/bridge.go index b4a800a..3dd679f 100644 --- a/config/bridge.go +++ b/config/bridge.go @@ -35,6 +35,7 @@ type BridgeConfig struct { ConnectionTimeout int `yaml:"connection_timeout"` MaxConnectionAttempts int `yaml:"max_connection_attempts"` + ConnectionRetryDelay int `yaml:"connection_retry_delay"` ReportConnectionRetry bool `yaml:"report_connection_retry"` InitialChatSync int `yaml:"initial_chat_sync_count"` @@ -56,6 +57,7 @@ type BridgeConfig struct { func (bc *BridgeConfig) setDefaults() { bc.ConnectionTimeout = 20 bc.MaxConnectionAttempts = 3 + bc.ConnectionRetryDelay = -1 bc.ReportConnectionRetry = true bc.InitialChatSync = 10 diff --git a/example-config.yaml b/example-config.yaml index c5546a9..e1b3f97 100644 --- a/example-config.yaml +++ b/example-config.yaml @@ -62,6 +62,9 @@ bridge: connection_timeout: 20 # Maximum number of times to retry connecting on connection error. max_connection_attempts: 3 + # Number of seconds to wait between connection attempts. + # Negative numbers are exponential backoff: -connection_retry_delay + 1 + 2^attempts + connection_retry_delay: -1 # Whether or not the bridge should send a notice to the user's management room when it retries connecting. # If false, it will only report when it stops retrying. report_connection_retry: true diff --git a/user.go b/user.go index b97bd8a..8713c3a 100644 --- a/user.go +++ b/user.go @@ -348,23 +348,22 @@ func (user *User) HandleError(err error) { if errors.Cause(err) != whatsapp.ErrInvalidWsData { user.log.Errorln("WhatsApp error:", err) } - var msg string if closed, ok := err.(*whatsapp.ErrConnectionClosed); ok { user.Connected = false if closed.Code == 1000 { // Normal closure return } - user.ConnectionErrors++ - msg = fmt.Sprintf("Your WhatsApp connection was closed with websocket status code %d", closed.Code) + go user.tryReconnect(fmt.Sprintf("Your WhatsApp connection was closed with websocket status code %d", closed.Code)) } else if failed, ok := err.(*whatsapp.ErrConnectionFailed); ok { user.Connected = false user.ConnectionErrors++ - msg = fmt.Sprintf("Your WhatsApp connection failed: %v", failed.Err) - } else { - // Unknown error, probably mostly harmless - return + go user.tryReconnect(fmt.Sprintf("Your WhatsApp connection failed: %v", failed.Err)) } + // Otherwise unknown error, probably mostly harmless +} + +func (user *User) tryReconnect(msg string) { if user.ConnectionErrors > user.bridge.Config.Bridge.MaxConnectionAttempts { content := format.RenderMarkdown(fmt.Sprintf("%s. Use the `reconnect` command to reconnect.", msg)) _, _ = user.bridge.Bot.SendMessageEvent(user.ManagementRoom, mautrix.EventMessage, content) @@ -375,9 +374,16 @@ func (user *User) HandleError(err error) { // Don't want the same error to be repeated msg = "" } - tries := 0 + var tries uint + var exponentialBackoff bool + baseDelay := time.Duration(user.bridge.Config.Bridge.ConnectionRetryDelay) + if baseDelay < 0 { + exponentialBackoff = true + baseDelay = -baseDelay + 1 + } + delay := baseDelay for user.ConnectionErrors <= user.bridge.Config.Bridge.MaxConnectionAttempts { - err = user.Conn.Restore() + err := user.Conn.Restore() if err == nil { user.ConnectionErrors = 0 user.Connected = true @@ -389,11 +395,14 @@ func (user *User) HandleError(err error) { tries++ user.ConnectionErrors++ if user.ConnectionErrors <= user.bridge.Config.Bridge.MaxConnectionAttempts { + if exponentialBackoff { + delay = (1 << tries) + baseDelay + } if user.bridge.Config.Bridge.ReportConnectionRetry { _, _ = user.bridge.Bot.SendNotice(user.ManagementRoom, - fmt.Sprintf("Reconnection attempt failed: %v. Retrying in 10 seconds...", err)) + fmt.Sprintf("Reconnection attempt failed: %v. Retrying in %d seconds...", err, delay)) } - time.Sleep(10 * time.Second) + time.Sleep(delay * time.Second) } }