mirror of
https://github.com/matrix-org/dendrite
synced 2024-11-04 23:19:03 +01:00
161f145176
* Add NATS JetStream support Update shopify/sarama * Fix addresses * Don't change Addresses in Defaults * Update saramajetstream * Add missing error check Keep typing events for at least one minute * Use all configured NATS addresses * Update saramajetstream * Try setting up with NATS * Make sure NATS uses own persistent directory (TODO: make this configurable) * Update go.mod/go.sum * Jetstream package * Various other refactoring * Build fixes * Config tweaks, make random jetstream storage path for CI * Disable interest policies * Try to sane default on jetstream base path * Try to use in-memory for CI * Restore storage/retention * Update nats.go dependency * Adapt changes to config * Remove unneeded TopicFor * Dep update * Revert "Remove unneeded TopicFor" This reverts commitf5a4e4a339
. * Revert changes made to streams * Fix build problems * Update nats-server * Update go.mod/go.sum * Roomserver input API queuing using NATS * Fix topic naming * Prometheus metrics * More refactoring to remove saramajetstream * Add missing topic * Don't try to populate map that doesn't exist * Roomserver output topic * Update go.mod/go.sum * Message acknowledgements * Ack tweaks * Try to resume transaction re-sends * Try to resume transaction re-sends * Update to matrix-org/gomatrixserverlib@91dadfb * Remove internal.PartitionStorer from components that don't consume keychanges * Try to reduce re-allocations a bit in resolveConflictsV2 * Tweak delivery options on RS input * Publish send-to-device messages into correct JetStream subject * Async and sync roomserver input * Update dendrite-config.yaml * Remove roomserver tests for now (they need rewriting) * Remove roomserver test again (was merged back in) * Update documentation * Docker updates * More Docker updates * Update Docker readme again * Fix lint issues * Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that * Go 1.16 instead of Go 1.13 for upgrade tests and Complement * Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit368675283f
. * Don't report any errors on `/send` to see what fun that creates * Fix panics on closed channel sends * Enforce state key matches sender * Do the same for leave * Various tweaks to make tests happier Squashed commit of the following: commit13f9028e7a
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:47:14 2022 +0000 Do the same for leave commite6be7f05c3
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:33:42 2022 +0000 Enforce state key matches sender commit85ede6d64b
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 14:07:04 2022 +0000 Fix panics on closed channel sends commit9755494a98
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:38:22 2022 +0000 Don't report any errors on `/send` to see what fun that creates commit3bb4f87b5d
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:00:26 2022 +0000 Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit368675283f
. commitfe2673ed7b
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 12:09:34 2022 +0000 Go 1.16 instead of Go 1.13 for upgrade tests and Complement commit368675283f
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 11:51:45 2022 +0000 Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that commitb028dfc085
Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 10:29:08 2022 +0000 Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Merge in NATS Server v2.6.6 and nats.go v1.13 into the in-process connection fork * Add `jetstream.WithJetStreamMessage` to make ack/nak-ing less messy, use process context in consumers * Fix consumer component name in federation API * Add comment explaining where streams are defined * Tweaks to roomserver input with comments * Finish that sentence that I apparently forgot to finish in INSTALL.md * Bump version number of config to 2 * Add comments around asynchronous sends to roomserver in processEventWithMissingState * More useful error message when the config version does not match * Set version in generate-config * Fix version in config.Defaults Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
214 lines
6.8 KiB
Go
214 lines
6.8 KiB
Go
// Copyright 2017 Vector Creations Ltd
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package input contains the code processes new room events
|
|
package input
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"sync"
|
|
|
|
"github.com/Arceliar/phony"
|
|
"github.com/getsentry/sentry-go"
|
|
"github.com/matrix-org/dendrite/internal/hooks"
|
|
"github.com/matrix-org/dendrite/roomserver/acls"
|
|
"github.com/matrix-org/dendrite/roomserver/api"
|
|
"github.com/matrix-org/dendrite/roomserver/storage"
|
|
"github.com/matrix-org/dendrite/setup/jetstream"
|
|
"github.com/matrix-org/gomatrixserverlib"
|
|
"github.com/nats-io/nats.go"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/tidwall/gjson"
|
|
)
|
|
|
|
var keyContentFields = map[string]string{
|
|
"m.room.join_rules": "join_rule",
|
|
"m.room.history_visibility": "history_visibility",
|
|
"m.room.member": "membership",
|
|
}
|
|
|
|
type Inputer struct {
|
|
DB storage.Database
|
|
JetStream nats.JetStreamContext
|
|
ServerName gomatrixserverlib.ServerName
|
|
ACLs *acls.ServerACLs
|
|
InputRoomEventTopic string
|
|
OutputRoomEventTopic string
|
|
workers sync.Map // room ID -> *phony.Inbox
|
|
}
|
|
|
|
// onMessage is called when a new event arrives in the roomserver input stream.
|
|
func (r *Inputer) Start() error {
|
|
_, err := r.JetStream.Subscribe(
|
|
r.InputRoomEventTopic,
|
|
// We specifically don't use jetstream.WithJetStreamMessage here because we
|
|
// queue the task off to a room-specific queue and the ACK needs to be sent
|
|
// later, possibly with an error response to the inputter if synchronous.
|
|
func(msg *nats.Msg) {
|
|
roomID := msg.Header.Get("room_id")
|
|
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec()
|
|
var inputRoomEvent api.InputRoomEvent
|
|
if err := json.Unmarshal(msg.Data, &inputRoomEvent); err != nil {
|
|
_ = msg.Term()
|
|
return
|
|
}
|
|
inbox, _ := r.workers.LoadOrStore(roomID, &phony.Inbox{})
|
|
inbox.(*phony.Inbox).Act(nil, func() {
|
|
if err := r.processRoomEvent(context.TODO(), &inputRoomEvent); err != nil {
|
|
sentry.CaptureException(err)
|
|
} else {
|
|
hooks.Run(hooks.KindNewEventPersisted, inputRoomEvent.Event)
|
|
}
|
|
_ = msg.Ack()
|
|
})
|
|
},
|
|
// NATS wants to acknowledge automatically by default when the message is
|
|
// read from the stream, but we want to override that behaviour by making
|
|
// sure that we only acknowledge when we're happy we've done everything we
|
|
// can. This ensures we retry things when it makes sense to do so.
|
|
nats.ManualAck(),
|
|
// NATS will try to redeliver things to us automatically if we don't ack
|
|
// or nak them within a certain amount of time. This stops that from
|
|
// happening, so we don't end up doing a lot of unnecessary duplicate work.
|
|
nats.MaxDeliver(0),
|
|
)
|
|
return err
|
|
}
|
|
|
|
// InputRoomEvents implements api.RoomserverInternalAPI
|
|
func (r *Inputer) InputRoomEvents(
|
|
ctx context.Context,
|
|
request *api.InputRoomEventsRequest,
|
|
response *api.InputRoomEventsResponse,
|
|
) {
|
|
if request.Asynchronous {
|
|
var err error
|
|
for _, e := range request.InputRoomEvents {
|
|
msg := &nats.Msg{
|
|
Subject: r.InputRoomEventTopic,
|
|
Header: nats.Header{},
|
|
}
|
|
roomID := e.Event.RoomID()
|
|
msg.Header.Set("room_id", roomID)
|
|
msg.Data, err = json.Marshal(e)
|
|
if err != nil {
|
|
response.ErrMsg = err.Error()
|
|
return
|
|
}
|
|
if _, err = r.JetStream.PublishMsg(msg); err != nil {
|
|
return
|
|
}
|
|
roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Inc()
|
|
}
|
|
} else {
|
|
responses := make(chan error, len(request.InputRoomEvents))
|
|
defer close(responses)
|
|
for _, e := range request.InputRoomEvents {
|
|
inputRoomEvent := e
|
|
inbox, _ := r.workers.LoadOrStore(inputRoomEvent.Event.RoomID(), &phony.Inbox{})
|
|
inbox.(*phony.Inbox).Act(nil, func() {
|
|
err := r.processRoomEvent(context.TODO(), &inputRoomEvent)
|
|
if err != nil {
|
|
sentry.CaptureException(err)
|
|
} else {
|
|
hooks.Run(hooks.KindNewEventPersisted, inputRoomEvent.Event)
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
default:
|
|
responses <- err
|
|
}
|
|
})
|
|
}
|
|
for i := 0; i < len(request.InputRoomEvents); i++ {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case err := <-responses:
|
|
if err != nil {
|
|
response.ErrMsg = err.Error()
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// WriteOutputEvents implements OutputRoomEventWriter
|
|
func (r *Inputer) WriteOutputEvents(roomID string, updates []api.OutputEvent) error {
|
|
var err error
|
|
for _, update := range updates {
|
|
msg := &nats.Msg{
|
|
Subject: r.OutputRoomEventTopic,
|
|
Header: nats.Header{},
|
|
}
|
|
msg.Header.Set(jetstream.RoomID, roomID)
|
|
msg.Data, err = json.Marshal(update)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
logger := log.WithFields(log.Fields{
|
|
"room_id": roomID,
|
|
"type": update.Type,
|
|
})
|
|
if update.NewRoomEvent != nil {
|
|
eventType := update.NewRoomEvent.Event.Type()
|
|
logger = logger.WithFields(log.Fields{
|
|
"event_type": eventType,
|
|
"event_id": update.NewRoomEvent.Event.EventID(),
|
|
"adds_state": len(update.NewRoomEvent.AddsStateEventIDs),
|
|
"removes_state": len(update.NewRoomEvent.RemovesStateEventIDs),
|
|
"send_as_server": update.NewRoomEvent.SendAsServer,
|
|
"sender": update.NewRoomEvent.Event.Sender(),
|
|
})
|
|
if update.NewRoomEvent.Event.StateKey() != nil {
|
|
logger = logger.WithField("state_key", *update.NewRoomEvent.Event.StateKey())
|
|
}
|
|
contentKey := keyContentFields[eventType]
|
|
if contentKey != "" {
|
|
value := gjson.GetBytes(update.NewRoomEvent.Event.Content(), contentKey)
|
|
if value.Exists() {
|
|
logger = logger.WithField("content_value", value.String())
|
|
}
|
|
}
|
|
|
|
if eventType == "m.room.server_acl" && update.NewRoomEvent.Event.StateKeyEquals("") {
|
|
ev := update.NewRoomEvent.Event.Unwrap()
|
|
defer r.ACLs.OnServerACLUpdate(ev)
|
|
}
|
|
}
|
|
logger.Tracef("Producing to topic '%s'", r.OutputRoomEventTopic)
|
|
if _, err := r.JetStream.PublishMsg(msg); err != nil {
|
|
logger.WithError(err).Errorf("Failed to produce to topic '%s': %s", r.OutputRoomEventTopic, err)
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func init() {
|
|
prometheus.MustRegister(roomserverInputBackpressure)
|
|
}
|
|
|
|
var roomserverInputBackpressure = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: "dendrite",
|
|
Subsystem: "roomserver",
|
|
Name: "input_backpressure",
|
|
Help: "How many events are queued for input for a given room",
|
|
},
|
|
[]string{"room_id"},
|
|
)
|