forked from MirrorHub/mautrix-whatsapp
Merge pull request #620 from vector-im/hs/add-connectionFailures-metric
Add a metric for connection failures
This commit is contained in:
commit
90ff645122
2 changed files with 15 additions and 0 deletions
12
metrics.go
12
metrics.go
|
@ -52,6 +52,7 @@ type MetricsHandler struct {
|
||||||
countCollection prometheus.Histogram
|
countCollection prometheus.Histogram
|
||||||
disconnections *prometheus.CounterVec
|
disconnections *prometheus.CounterVec
|
||||||
incomingRetryReceipts *prometheus.CounterVec
|
incomingRetryReceipts *prometheus.CounterVec
|
||||||
|
connectionFailures *prometheus.CounterVec
|
||||||
puppetCount prometheus.Gauge
|
puppetCount prometheus.Gauge
|
||||||
userCount prometheus.Gauge
|
userCount prometheus.Gauge
|
||||||
messageCount prometheus.Gauge
|
messageCount prometheus.Gauge
|
||||||
|
@ -101,6 +102,10 @@ func NewMetricsHandler(address string, log log.Logger, db *database.Database) *M
|
||||||
Name: "whatsapp_disconnections",
|
Name: "whatsapp_disconnections",
|
||||||
Help: "Number of times a Matrix user has been disconnected from WhatsApp",
|
Help: "Number of times a Matrix user has been disconnected from WhatsApp",
|
||||||
}, []string{"user_id"}),
|
}, []string{"user_id"}),
|
||||||
|
connectionFailures: promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Name: "whatsapp_connection_failures",
|
||||||
|
Help: "Number of times a connection has failed to whatsapp",
|
||||||
|
}, []string{"reason"}),
|
||||||
incomingRetryReceipts: promauto.NewCounterVec(prometheus.CounterOpts{
|
incomingRetryReceipts: promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
Name: "whatsapp_incoming_retry_receipts",
|
Name: "whatsapp_incoming_retry_receipts",
|
||||||
Help: "Number of times a remote WhatsApp user has requested a retry from the bridge. retry_count = 5 is usually the last attempt (and very likely means a failed message)",
|
Help: "Number of times a remote WhatsApp user has requested a retry from the bridge. retry_count = 5 is usually the last attempt (and very likely means a failed message)",
|
||||||
|
@ -173,6 +178,13 @@ func (mh *MetricsHandler) TrackDisconnection(userID id.UserID) {
|
||||||
mh.disconnections.With(prometheus.Labels{"user_id": string(userID)}).Inc()
|
mh.disconnections.With(prometheus.Labels{"user_id": string(userID)}).Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (mh *MetricsHandler) TrackConnectionFailure(reason string) {
|
||||||
|
if !mh.running {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mh.connectionFailures.With(prometheus.Labels{"reason": reason}).Inc()
|
||||||
|
}
|
||||||
|
|
||||||
func (mh *MetricsHandler) TrackRetryReceipt(count int, found bool) {
|
func (mh *MetricsHandler) TrackRetryReceipt(count int, found bool) {
|
||||||
if !mh.running {
|
if !mh.running {
|
||||||
return
|
return
|
||||||
|
|
3
user.go
3
user.go
|
@ -844,13 +844,16 @@ func (user *User) HandleEvent(event interface{}) {
|
||||||
case *events.ConnectFailure:
|
case *events.ConnectFailure:
|
||||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: fmt.Sprintf("Unknown connection failure: %s", v.Reason)})
|
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: fmt.Sprintf("Unknown connection failure: %s", v.Reason)})
|
||||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||||
|
user.bridge.Metrics.TrackConnectionFailure(fmt.Sprintf("status-%d", v.Reason))
|
||||||
case *events.ClientOutdated:
|
case *events.ClientOutdated:
|
||||||
user.log.Errorfln("Got a client outdated connect failure. The bridge is likely out of date, please update immediately.")
|
user.log.Errorfln("Got a client outdated connect failure. The bridge is likely out of date, please update immediately.")
|
||||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: "Connect failure: 405 client outdated"})
|
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: "Connect failure: 405 client outdated"})
|
||||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||||
|
user.bridge.Metrics.TrackConnectionFailure("client-outdated")
|
||||||
case *events.TemporaryBan:
|
case *events.TemporaryBan:
|
||||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateBadCredentials, Message: v.String()})
|
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateBadCredentials, Message: v.String()})
|
||||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||||
|
user.bridge.Metrics.TrackConnectionFailure("temporary-ban")
|
||||||
case *events.Disconnected:
|
case *events.Disconnected:
|
||||||
// Don't send the normal transient disconnect state if we're already in a different transient disconnect state.
|
// Don't send the normal transient disconnect state if we're already in a different transient disconnect state.
|
||||||
// TODO remove this if/when the phone offline state is moved to a sub-state of CONNECTED
|
// TODO remove this if/when the phone offline state is moved to a sub-state of CONNECTED
|
||||||
|
|
Loading…
Reference in a new issue