mirror of
https://github.com/tulir/mautrix-whatsapp
synced 2024-12-13 17:13:11 +01:00
Merge pull request #620 from vector-im/hs/add-connectionFailures-metric
Add a metric for connection failures
This commit is contained in:
commit
90ff645122
2 changed files with 15 additions and 0 deletions
12
metrics.go
12
metrics.go
|
@ -52,6 +52,7 @@ type MetricsHandler struct {
|
|||
countCollection prometheus.Histogram
|
||||
disconnections *prometheus.CounterVec
|
||||
incomingRetryReceipts *prometheus.CounterVec
|
||||
connectionFailures *prometheus.CounterVec
|
||||
puppetCount prometheus.Gauge
|
||||
userCount prometheus.Gauge
|
||||
messageCount prometheus.Gauge
|
||||
|
@ -101,6 +102,10 @@ func NewMetricsHandler(address string, log log.Logger, db *database.Database) *M
|
|||
Name: "whatsapp_disconnections",
|
||||
Help: "Number of times a Matrix user has been disconnected from WhatsApp",
|
||||
}, []string{"user_id"}),
|
||||
connectionFailures: promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "whatsapp_connection_failures",
|
||||
Help: "Number of times a connection has failed to whatsapp",
|
||||
}, []string{"reason"}),
|
||||
incomingRetryReceipts: promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "whatsapp_incoming_retry_receipts",
|
||||
Help: "Number of times a remote WhatsApp user has requested a retry from the bridge. retry_count = 5 is usually the last attempt (and very likely means a failed message)",
|
||||
|
@ -173,6 +178,13 @@ func (mh *MetricsHandler) TrackDisconnection(userID id.UserID) {
|
|||
mh.disconnections.With(prometheus.Labels{"user_id": string(userID)}).Inc()
|
||||
}
|
||||
|
||||
func (mh *MetricsHandler) TrackConnectionFailure(reason string) {
|
||||
if !mh.running {
|
||||
return
|
||||
}
|
||||
mh.connectionFailures.With(prometheus.Labels{"reason": reason}).Inc()
|
||||
}
|
||||
|
||||
func (mh *MetricsHandler) TrackRetryReceipt(count int, found bool) {
|
||||
if !mh.running {
|
||||
return
|
||||
|
|
3
user.go
3
user.go
|
@ -844,13 +844,16 @@ func (user *User) HandleEvent(event interface{}) {
|
|||
case *events.ConnectFailure:
|
||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: fmt.Sprintf("Unknown connection failure: %s", v.Reason)})
|
||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||
user.bridge.Metrics.TrackConnectionFailure(fmt.Sprintf("status-%d", v.Reason))
|
||||
case *events.ClientOutdated:
|
||||
user.log.Errorfln("Got a client outdated connect failure. The bridge is likely out of date, please update immediately.")
|
||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Message: "Connect failure: 405 client outdated"})
|
||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||
user.bridge.Metrics.TrackConnectionFailure("client-outdated")
|
||||
case *events.TemporaryBan:
|
||||
user.BridgeState.Send(status.BridgeState{StateEvent: status.StateBadCredentials, Message: v.String()})
|
||||
user.bridge.Metrics.TrackConnectionState(user.JID, false)
|
||||
user.bridge.Metrics.TrackConnectionFailure("temporary-ban")
|
||||
case *events.Disconnected:
|
||||
// Don't send the normal transient disconnect state if we're already in a different transient disconnect state.
|
||||
// TODO remove this if/when the phone offline state is moved to a sub-state of CONNECTED
|
||||
|
|
Loading…
Reference in a new issue