From d5876abbe9f5484768f603ec91a567b8650e6e73 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Wed, 7 Sep 2022 18:15:54 +0200 Subject: [PATCH] Fulltext implementation incl. config (#2480) This adds the main component of the fulltext search. This PR doesn't do anything yet, besides creating an empty fulltextindex folder if enabled. Indexing events is done in a separate PR. --- dendrite-sample.monolith.yaml | 4 + dendrite-sample.polylith.yaml | 4 + docs/installation/7_configuration.md | 13 ++ go.mod | 27 ++- go.sum | 75 ++++++++ internal/fulltext/bleve.go | 164 ++++++++++++++++++ internal/fulltext/bleve_test.go | 250 +++++++++++++++++++++++++++ internal/fulltext/bleve_wasm.go | 65 +++++++ setup/base/base.go | 12 ++ setup/config/config_syncapi.go | 26 +++ setup/jetstream/streams.go | 1 + test/testrig/base.go | 8 +- 12 files changed, 647 insertions(+), 2 deletions(-) create mode 100644 internal/fulltext/bleve.go create mode 100644 internal/fulltext/bleve_test.go create mode 100644 internal/fulltext/bleve_wasm.go diff --git a/dendrite-sample.monolith.yaml b/dendrite-sample.monolith.yaml index f753c3d9b..195d11610 100644 --- a/dendrite-sample.monolith.yaml +++ b/dendrite-sample.monolith.yaml @@ -268,6 +268,10 @@ sync_api: # address of the client. This is likely required if Dendrite is running behind # a reverse proxy server. # real_ip_header: X-Real-IP + fulltext: + enabled: false + index_path: "./fulltextindex" + language: "en" # more possible languages can be found at https://github.com/blevesearch/bleve/tree/master/analysis/lang # Configuration for the User API. user_api: diff --git a/dendrite-sample.polylith.yaml b/dendrite-sample.polylith.yaml index 3caf91434..65d60e150 100644 --- a/dendrite-sample.polylith.yaml +++ b/dendrite-sample.polylith.yaml @@ -319,6 +319,10 @@ sync_api: max_open_conns: 10 max_idle_conns: 2 conn_max_lifetime: -1 + fulltext: + enabled: false + index_path: "./fulltextindex" + language: "en" # more possible languages can be found at https://github.com/blevesearch/bleve/tree/master/analysis/lang # This option controls which HTTP header to inspect to find the real remote IP # address of the client. This is likely required if Dendrite is running behind diff --git a/docs/installation/7_configuration.md b/docs/installation/7_configuration.md index b1c747414..8fbe71c40 100644 --- a/docs/installation/7_configuration.md +++ b/docs/installation/7_configuration.md @@ -138,6 +138,19 @@ room_server: conn_max_lifetime: -1 ``` +## Fulltext search + +Dendrite supports experimental fulltext indexing using [Bleve](https://github.com/blevesearch/bleve), it is configured in the `sync_api` section as follows. Depending on the language most likely to be used on the server, it might make sense to change the `language` used when indexing, to ensure the returned results match the expections. A full list of possible languages can be found [here](https://github.com/blevesearch/bleve/tree/master/analysis/lang). + +```yaml +sync_api: + # ... + fulltext: + enabled: false + index_path: "./fulltextindex" + language: "en" +``` + ## Other sections There are other options which may be useful so review them all. In particular, if you are diff --git a/go.mod b/go.mod index c737e467c..572453974 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/DATA-DOG/go-sqlmock v1.5.0 github.com/MFAshby/stdemuxerhook v1.0.0 github.com/Masterminds/semver/v3 v3.1.1 + github.com/blevesearch/bleve/v2 v2.3.4 github.com/codeclysm/extract v2.2.0+incompatible github.com/dgraph-io/ristretto v0.1.1-0.20220403145359-8e850b710d6d github.com/docker/docker v20.10.16+incompatible @@ -16,7 +17,6 @@ require ( github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 github.com/gorilla/websocket v1.5.0 - github.com/kardianos/minwinsvc v1.0.0 github.com/lib/pq v1.10.5 github.com/matrix-org/dugong v0.0.0-20210921133753-66e6b1c67e2e github.com/matrix-org/go-sqlite3-js v0.0.0-20220419092513-28aa791a1c91 @@ -56,7 +56,24 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect github.com/Microsoft/go-winio v0.5.1 // indirect + github.com/RoaringBitmap/roaring v0.9.4 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/blevesearch/bleve_index_api v1.0.3 // indirect + github.com/blevesearch/geo v0.1.13 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.1.2 // indirect + github.com/blevesearch/segment v0.9.0 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.1 // indirect + github.com/blevesearch/vellum v1.0.8 // indirect + github.com/blevesearch/zapx/v11 v11.3.5 // indirect + github.com/blevesearch/zapx/v12 v12.3.5 // indirect + github.com/blevesearch/zapx/v13 v13.3.5 // indirect + github.com/blevesearch/zapx/v14 v14.3.5 // indirect + github.com/blevesearch/zapx/v15 v15.3.5 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/cheekybits/genny v1.0.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -67,11 +84,15 @@ require ( github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect github.com/golang/protobuf v1.5.2 // indirect + github.com/golang/snappy v0.0.1 // indirect github.com/h2non/filetype v1.1.3 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/juju/errors v0.0.0-20220203013757-bd733f3c86b9 // indirect github.com/juju/testing v0.0.0-20220203020004-a0ff61f03494 // indirect + github.com/kardianos/minwinsvc v1.0.0 // indirect github.com/klauspost/compress v1.15.9 // indirect github.com/lucas-clemente/quic-go v0.28.1 // indirect github.com/marten-seemann/qtls-go1-16 v0.1.5 // indirect @@ -82,7 +103,10 @@ require ( github.com/miekg/dns v1.1.49 // indirect github.com/minio/highwayhash v1.0.2 // indirect github.com/moby/term v0.0.0-20210610120745-9d4ed1856297 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/morikuni/aec v1.0.0 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/nats-io/jwt/v2 v2.3.0 // indirect github.com/nats-io/nkeys v0.3.0 // indirect github.com/nats-io/nuid v1.0.1 // indirect @@ -98,6 +122,7 @@ require ( github.com/stretchr/objx v0.2.0 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect + go.etcd.io/bbolt v1.3.5 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect golang.org/x/sys v0.0.0-20220731174439-a90be440212d // indirect golang.org/x/text v0.3.8-0.20211004125949-5bd84dd9b33b // indirect diff --git a/go.sum b/go.sum index 0b331befe..00392cb65 100644 --- a/go.sum +++ b/go.sum @@ -58,6 +58,8 @@ github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0 github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY= github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= +github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo= +github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= github.com/RyanCarrier/dijkstra v1.1.0/go.mod h1:5agGUBNEtUAGIANmbw09fuO3a2htPEkc1jNH01qxCWA= github.com/RyanCarrier/dijkstra-1 v0.0.0-20170512020943-0e5801a26345/go.mod h1:OK4EvWJ441LQqGzed5NGB6vKBAE34n3z7iayPcEwr30= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= @@ -79,10 +81,49 @@ github.com/anacrolix/missinggo v1.2.1/go.mod h1:J5cMhif8jPmFoC3+Uvob3OXXNIhOUikz github.com/anacrolix/missinggo/perf v1.0.0/go.mod h1:ljAFWkBuzkO12MQclXzZrosP5urunoLS0Cbvb4V0uMQ= github.com/anacrolix/tagflag v0.0.0-20180109131632-2146c8d41bf0/go.mod h1:1m2U/K6ZT+JZG0+bdMK6qauP49QT4wE5pmhJXOKKCHw= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= +github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/blevesearch/bleve/v2 v2.3.4 h1:SSb7/cwGzo85LWX1jchIsXM8ZiNNMX3shT5lROM63ew= +github.com/blevesearch/bleve/v2 v2.3.4/go.mod h1:Ot0zYum8XQRfPcwhae8bZmNyYubynsoMjVvl1jPqL30= +github.com/blevesearch/bleve_index_api v1.0.3 h1:DDSWaPXOZZJ2BB73ZTWjKxydAugjwywcqU+91AAqcAg= +github.com/blevesearch/bleve_index_api v1.0.3/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4= +github.com/blevesearch/geo v0.1.13 h1:RsY1vfFm81iv1g+uoCQtsOFvKAhZnpOdTOK8JRA6pqw= +github.com/blevesearch/geo v0.1.13/go.mod h1:cRIvqCdk3cgMhGeHNNe6yPzb+w56otxbfo1FBJfR2Pc= +github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/goleveldb v1.0.1/go.mod h1:WrU8ltZbIp0wAoig/MHbrPCXSOLpe79nz5lv5nqfYrQ= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.1.2 h1:TAte9VZLWda5WAVlZTTZ+GCzEHqGJb4iB2aiZSA6Iv8= +github.com/blevesearch/scorch_segment_api/v2 v2.1.2/go.mod h1:rvoQXZGq8drq7vXbNeyiRzdEOwZkjkiYGf1822i6CRA= +github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac= +github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ= +github.com/blevesearch/snowball v0.6.1/go.mod h1:ZF0IBg5vgpeoUhnMza2v0A/z8m1cWPlwhke08LpNusg= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15amfX2pXlYbFOnF8zNU= +github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q= +github.com/blevesearch/vellum v1.0.8 h1:iMGh4lfxza4BnWO/UJTMPlI3HsK9YawjPv+TteVa9ck= +github.com/blevesearch/vellum v1.0.8/go.mod h1:+cpRi/tqq49xUYSQN2P7A5zNSNrS+MscLeeaZ3J46UA= +github.com/blevesearch/zapx/v11 v11.3.5 h1:eBQWQ7huA+mzm0sAGnZDwgGGli7S45EO+N+ObFWssbI= +github.com/blevesearch/zapx/v11 v11.3.5/go.mod h1:5UdIa/HRMdeRCiLQOyFESsnqBGiip7vQmYReA9toevU= +github.com/blevesearch/zapx/v12 v12.3.5 h1:5pX2hU+R1aZihT7ac1dNWh1n4wqkIM9pZzWp0ANED9s= +github.com/blevesearch/zapx/v12 v12.3.5/go.mod h1:ANcthYRZQycpbRut/6ArF5gP5HxQyJqiFcuJCBju/ss= +github.com/blevesearch/zapx/v13 v13.3.5 h1:eJ3gbD+Nu8p36/O6lhfdvWQ4pxsGYSuTOBrLLPVWJ74= +github.com/blevesearch/zapx/v13 v13.3.5/go.mod h1:FV+dRnScFgKnRDIp08RQL4JhVXt1x2HE3AOzqYa6fjo= +github.com/blevesearch/zapx/v14 v14.3.5 h1:hEvVjZaagFCvOUJrlFQ6/Z6Jjy0opM3g7TMEo58TwP4= +github.com/blevesearch/zapx/v14 v14.3.5/go.mod h1:954A/eKFb+pg/ncIYWLWCKY+mIjReM9FGTGIO2Wu1cU= +github.com/blevesearch/zapx/v15 v15.3.5 h1:NVD0qq8vRk66ImJn1KloXT5ckqPDUZT7VbVJs9jKlac= +github.com/blevesearch/zapx/v15 v15.3.5/go.mod h1:QMUh2hXCaYIWFKPYGavq/Iga2zbHWZ9DZAa9uFbWyvg= github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g= github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo= github.com/bradfitz/iter v0.0.0-20190303215204-33e6a9893b0c/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo= @@ -102,7 +143,13 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/codeclysm/extract v2.2.0+incompatible h1:q3wyckoA30bhUSiwdQezMqVhwd8+WGE64/GL//LtUhI= github.com/codeclysm/extract v2.2.0+incompatible/go.mod h1:2nhFMPHiU9At61hz+12bfrlpXSUrOnK+wR+KlGO4Uks= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= +github.com/couchbase/moss v0.2.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -180,6 +227,8 @@ github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7a github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -213,6 +262,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gologme/log v1.3.0 h1:l781G4dE+pbigClDSDzSaaYKtiueHCILUa/qSDsmHAo= github.com/gologme/log v1.3.0/go.mod h1:yKT+DvIPdDdDoPtqFrFxheooyVmoqi0BAsw+erN3wA4= github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -233,6 +284,7 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -264,12 +316,15 @@ github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslC github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.0.0 h1:pO2K/gKgKaat5LdpAhxhluX2GPQMaI3W5FUz/I/UnWk= github.com/huandu/xstrings v1.0.0/go.mod h1:4qWG/gcEcfX4z/mBDHJ++3ReCw9ibxbsNJbcucJdbSo= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -315,6 +370,7 @@ github.com/lib/pq v1.10.5/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lucas-clemente/quic-go v0.28.1 h1:Uo0lvVxWg5la9gflIF9lwa39ONq85Xq2D91YNEIslzU= github.com/lucas-clemente/quic-go v0.28.1/go.mod h1:oGz5DKK41cJt5+773+BSO9BXDsREY4HLf7+0odGAPO0= github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/marten-seemann/qpack v0.2.1/go.mod h1:F7Gl5L1jIgN1D11ucXefiuJS9UMVP2opoCp2jDKb7wc= github.com/marten-seemann/qtls-go1-16 v0.1.5 h1:o9JrYPPco/Nukd/HpOHMHZoBDXQqoNtUCmny98/1uqQ= @@ -353,6 +409,8 @@ github.com/miekg/dns v1.1.49 h1:qe0mQU3Z/XpFeE+AEBo2rqaS1IPBJ3anmqZ4XiZJVG8= github.com/miekg/dns v1.1.49/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/moby/term v0.0.0-20210610120745-9d4ed1856297 h1:yH0SvLzcbZxcJXho2yh7CqdENGMQe73Cw3woZBpPli0= github.com/moby/term v0.0.0-20210610120745-9d4ed1856297/go.mod h1:vgPCkQMyxTZ7IDy8SXRufE172gr8+K/JE/7hHFxHW3A= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -365,6 +423,8 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nats-io/jwt/v2 v2.3.0 h1:z2mA1a7tIf5ShggOFlR1oBPgd6hGqcDYsISxZByUzdI= @@ -391,12 +451,14 @@ github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= github.com/onsi/ginkgo v1.16.2/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E= github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.13.0/go.mod h1:lRk9szgn8TxENtWd0Tp4c3wjlRfMTMH27I+3Je41yGY= @@ -411,6 +473,7 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -482,7 +545,12 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1 github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= @@ -512,6 +580,7 @@ github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVK github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= @@ -520,6 +589,7 @@ github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYp github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yggdrasil-network/yggdrasil-go v0.4.5-0.20220901155642-4f2abece817c h1:/cTmA6pV2Z20BT/FGSmnb5BmJ8eRbDP0HbCB5IO1aKw= github.com/yggdrasil-network/yggdrasil-go v0.4.5-0.20220901155642-4f2abece817c/go.mod h1:cIwhYwX9yT9Bcei59O0oOBSaj+kQP+9aVQUMWHh5R00= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -529,6 +599,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= +go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -542,6 +614,7 @@ golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+ golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190313024323-a1f597ede03a/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -672,6 +745,8 @@ golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/fulltext/bleve.go b/internal/fulltext/bleve.go new file mode 100644 index 000000000..b07c0e51d --- /dev/null +++ b/internal/fulltext/bleve.go @@ -0,0 +1,164 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !wasm +// +build !wasm + +package fulltext + +import ( + "strings" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/mapping" + "github.com/matrix-org/dendrite/setup/config" + "github.com/matrix-org/gomatrixserverlib" +) + +// Search contains all existing bleve.Index +type Search struct { + FulltextIndex bleve.Index +} + +// IndexElement describes the layout of an element to index +type IndexElement struct { + EventID string + RoomID string + Content string + ContentType string + StreamPosition int64 +} + +// SetContentType sets i.ContentType given an identifier +func (i *IndexElement) SetContentType(v string) { + switch v { + case "m.room.message": + i.ContentType = "content.body" + case gomatrixserverlib.MRoomName: + i.ContentType = "content.name" + case gomatrixserverlib.MRoomTopic: + i.ContentType = "content.topic" + } +} + +// New opens a new/existing fulltext index +func New(cfg config.Fulltext) (fts *Search, err error) { + fts = &Search{} + fts.FulltextIndex, err = openIndex(cfg) + if err != nil { + return nil, err + } + return fts, nil +} + +// Close closes the fulltext index +func (f *Search) Close() error { + return f.FulltextIndex.Close() +} + +// Index indexes the given elements +func (f *Search) Index(elements ...IndexElement) error { + batch := f.FulltextIndex.NewBatch() + + for _, element := range elements { + err := batch.Index(element.EventID, element) + if err != nil { + return err + } + } + return f.FulltextIndex.Batch(batch) +} + +// Delete deletes an indexed element by the eventID +func (f *Search) Delete(eventID string) error { + return f.FulltextIndex.Delete(eventID) +} + +// Search searches the index given a search term, roomIDs and keys. +func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) { + qry := bleve.NewConjunctionQuery() + termQuery := bleve.NewBooleanQuery() + + terms := strings.Split(term, " ") + for _, term := range terms { + matchQuery := bleve.NewMatchQuery(term) + matchQuery.SetField("Content") + termQuery.AddMust(matchQuery) + } + qry.AddQuery(termQuery) + + roomQuery := bleve.NewBooleanQuery() + for _, roomID := range roomIDs { + roomSearch := bleve.NewMatchQuery(roomID) + roomSearch.SetField("RoomID") + roomQuery.AddShould(roomSearch) + } + if len(roomIDs) > 0 { + qry.AddQuery(roomQuery) + } + keyQuery := bleve.NewBooleanQuery() + for _, key := range keys { + keySearch := bleve.NewMatchQuery(key) + keySearch.SetField("ContentType") + keyQuery.AddShould(keySearch) + } + if len(keys) > 0 { + qry.AddQuery(keyQuery) + } + + s := bleve.NewSearchRequestOptions(qry, limit, from, false) + s.Fields = []string{"*"} + s.SortBy([]string{"_score"}) + if orderByStreamPos { + s.SortBy([]string{"-StreamPosition"}) + } + + return f.FulltextIndex.Search(s) +} + +func openIndex(cfg config.Fulltext) (bleve.Index, error) { + m := getMapping(cfg) + if cfg.InMemory { + return bleve.NewMemOnly(m) + } + if index, err := bleve.Open(string(cfg.IndexPath)); err == nil { + return index, nil + } + + index, err := bleve.New(string(cfg.IndexPath), m) + if err != nil { + return nil, err + } + return index, nil +} + +func getMapping(cfg config.Fulltext) *mapping.IndexMappingImpl { + enFieldMapping := bleve.NewTextFieldMapping() + enFieldMapping.Analyzer = cfg.Language + + eventMapping := bleve.NewDocumentMapping() + eventMapping.AddFieldMappingsAt("Content", enFieldMapping) + eventMapping.AddFieldMappingsAt("StreamPosition", bleve.NewNumericFieldMapping()) + + // Index entries as is + idFieldMapping := bleve.NewKeywordFieldMapping() + eventMapping.AddFieldMappingsAt("ContentType", idFieldMapping) + eventMapping.AddFieldMappingsAt("RoomID", idFieldMapping) + eventMapping.AddFieldMappingsAt("EventID", idFieldMapping) + + indexMapping := bleve.NewIndexMapping() + indexMapping.AddDocumentMapping("Event", eventMapping) + indexMapping.DefaultType = "Event" + return indexMapping +} diff --git a/internal/fulltext/bleve_test.go b/internal/fulltext/bleve_test.go new file mode 100644 index 000000000..84a282423 --- /dev/null +++ b/internal/fulltext/bleve_test.go @@ -0,0 +1,250 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fulltext_test + +import ( + "reflect" + "testing" + + "github.com/matrix-org/gomatrixserverlib" + "github.com/matrix-org/util" + + "github.com/matrix-org/dendrite/internal/fulltext" + "github.com/matrix-org/dendrite/setup/config" +) + +func mustOpenIndex(t *testing.T, tempDir string) *fulltext.Search { + t.Helper() + cfg := config.Fulltext{} + cfg.Defaults(config.DefaultOpts{ + Generate: true, + Monolithic: true, + }) + if tempDir != "" { + cfg.IndexPath = config.Path(tempDir) + cfg.InMemory = false + } + fts, err := fulltext.New(cfg) + if err != nil { + t.Fatal("failed to open fulltext index:", err) + } + return fts +} + +func mustAddTestData(t *testing.T, fts *fulltext.Search, firstStreamPos int64) (eventIDs, roomIDs []string) { + t.Helper() + // create some more random data + var batchItems []fulltext.IndexElement + streamPos := firstStreamPos + + wantRoomID := util.RandomString(16) + + for i := 0; i < 30; i++ { + streamPos++ + eventID := util.RandomString(16) + // Create more data for the first room + if i > 15 { + wantRoomID = util.RandomString(16) + } + e := fulltext.IndexElement{ + EventID: eventID, + RoomID: wantRoomID, + Content: "lorem ipsum", + StreamPosition: streamPos, + } + e.SetContentType("m.room.message") + batchItems = append(batchItems, e) + roomIDs = append(roomIDs, wantRoomID) + eventIDs = append(eventIDs, eventID) + } + e := fulltext.IndexElement{ + EventID: util.RandomString(16), + RoomID: wantRoomID, + Content: "Roomname testing", + StreamPosition: streamPos, + } + e.SetContentType(gomatrixserverlib.MRoomName) + batchItems = append(batchItems, e) + e = fulltext.IndexElement{ + EventID: util.RandomString(16), + RoomID: wantRoomID, + Content: "Room topic fulltext", + StreamPosition: streamPos, + } + e.SetContentType(gomatrixserverlib.MRoomTopic) + batchItems = append(batchItems, e) + if err := fts.Index(batchItems...); err != nil { + t.Fatalf("failed to batch insert elements: %v", err) + } + return eventIDs, roomIDs +} + +func TestOpen(t *testing.T) { + dataDir := t.TempDir() + fts := mustOpenIndex(t, dataDir) + if err := fts.Close(); err != nil { + t.Fatal("unable to close fulltext index", err) + } + + // open existing index + fts = mustOpenIndex(t, dataDir) + defer fts.Close() +} + +func TestIndex(t *testing.T) { + fts := mustOpenIndex(t, "") + defer fts.Close() + + // add some data + var streamPos int64 = 1 + roomID := util.RandomString(8) + eventID := util.RandomString(16) + e := fulltext.IndexElement{ + EventID: eventID, + RoomID: roomID, + Content: "lorem ipsum", + StreamPosition: streamPos, + } + e.SetContentType("m.room.message") + + if err := fts.Index(e); err != nil { + t.Fatal("failed to index element", err) + } + + // create some more random data + mustAddTestData(t, fts, streamPos) +} + +func TestDelete(t *testing.T) { + fts := mustOpenIndex(t, "") + defer fts.Close() + eventIDs, roomIDs := mustAddTestData(t, fts, 0) + res1, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false) + if err != nil { + t.Fatal(err) + } + + if err = fts.Delete(eventIDs[0]); err != nil { + t.Fatal(err) + } + + res2, err := fts.Search("lorem", roomIDs[:1], nil, 50, 0, false) + if err != nil { + t.Fatal(err) + } + + if res1.Total <= res2.Total { + t.Fatalf("got unexpected result: %d <= %d", res1.Total, res2.Total) + } +} + +func TestSearch(t *testing.T) { + type args struct { + term string + keys []string + limit int + from int + orderByStreamPos bool + roomIndex []int + } + tests := []struct { + name string + args args + wantCount int + wantErr bool + }{ + { + name: "Can search for many results in one room", + wantCount: 16, + args: args{ + term: "lorem", + roomIndex: []int{0}, + limit: 20, + }, + }, + { + name: "Can search for one result in one room", + wantCount: 1, + args: args{ + term: "lorem", + roomIndex: []int{16}, + limit: 20, + }, + }, + { + name: "Can search for many results in multiple rooms", + wantCount: 17, + args: args{ + term: "lorem", + roomIndex: []int{0, 16}, + limit: 20, + }, + }, + { + name: "Can search for many results in all rooms, reversed", + wantCount: 30, + args: args{ + term: "lorem", + limit: 30, + orderByStreamPos: true, + }, + }, + { + name: "Can search for specific search room name", + wantCount: 1, + args: args{ + term: "testing", + roomIndex: []int{}, + limit: 20, + keys: []string{"content.name"}, + }, + }, + { + name: "Can search for specific search room topic", + wantCount: 1, + args: args{ + term: "fulltext", + roomIndex: []int{}, + limit: 20, + keys: []string{"content.topic"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := mustOpenIndex(t, "") + eventIDs, roomIDs := mustAddTestData(t, f, 0) + var searchRooms []string + for _, x := range tt.args.roomIndex { + searchRooms = append(searchRooms, roomIDs[x]) + } + t.Logf("searching in rooms: %v - %v\n", searchRooms, tt.args.keys) + + got, err := f.Search(tt.args.term, searchRooms, tt.args.keys, tt.args.limit, tt.args.from, tt.args.orderByStreamPos) + if (err != nil) != tt.wantErr { + t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(len(got.Hits), tt.wantCount) { + t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount) + } + if tt.args.orderByStreamPos { + if got.Hits[0].ID != eventIDs[29] { + t.Fatalf("expected ID %s, got %s", eventIDs[29], got.Hits[0].ID) + } + } + }) + } +} diff --git a/internal/fulltext/bleve_wasm.go b/internal/fulltext/bleve_wasm.go new file mode 100644 index 000000000..a69a8926e --- /dev/null +++ b/internal/fulltext/bleve_wasm.go @@ -0,0 +1,65 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fulltext + +import ( + "github.com/matrix-org/dendrite/setup/config" + "time" +) + +type Search struct{} +type IndexElement struct { + EventID string + RoomID string + Content string + ContentType string + StreamPosition int64 +} + +type SearchResult struct { + Status interface{} `json:"status"` + Request *interface{} `json:"request"` + Hits []interface{} `json:"hits"` + Total uint64 `json:"total_hits"` + MaxScore float64 `json:"max_score"` + Took time.Duration `json:"took"` + Facets interface{} `json:"facets"` +} + +func (i *IndexElement) SetContentType(v string) {} + +func New(cfg config.Fulltext) (fts *Search, err error) { + return &Search{}, nil +} + +func (f *Search) Close() error { + return nil +} + +func (f *Search) Index(e IndexElement) error { + return nil +} + +func (f *Search) BatchIndex(elements []IndexElement) error { + return nil +} + +func (f *Search) Delete(eventID string) error { + return nil +} + +func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) { + return SearchResult{}, nil +} diff --git a/setup/base/base.go b/setup/base/base.go index 87f415764..3070bed5f 100644 --- a/setup/base/base.go +++ b/setup/base/base.go @@ -38,6 +38,7 @@ import ( "golang.org/x/net/http2/h2c" "github.com/matrix-org/dendrite/internal/caching" + "github.com/matrix-org/dendrite/internal/fulltext" "github.com/matrix-org/dendrite/internal/httputil" "github.com/matrix-org/dendrite/internal/pushgateway" "github.com/matrix-org/dendrite/internal/sqlutil" @@ -90,6 +91,7 @@ type BaseDendrite struct { Database *sql.DB DatabaseWriter sqlutil.Writer EnableMetrics bool + Fulltext *fulltext.Search startupLock sync.Mutex } @@ -150,6 +152,15 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base logrus.WithError(err).Panicf("failed to start opentracing") } + var fts *fulltext.Search + isSyncOrMonolith := componentName == "syncapi" || isMonolith + if cfg.SyncAPI.Fulltext.Enabled && isSyncOrMonolith { + fts, err = fulltext.New(cfg.SyncAPI.Fulltext) + if err != nil { + logrus.WithError(err).Panicf("failed to create full text") + } + } + if cfg.Global.Sentry.Enabled { logrus.Info("Setting up Sentry for debugging...") err = sentry.Init(sentry.ClientOptions{ @@ -247,6 +258,7 @@ func NewBaseDendrite(cfg *config.Dendrite, componentName string, options ...Base Database: db, // set if monolith with global connection pool only DatabaseWriter: writer, // set if monolith with global connection pool only EnableMetrics: enableMetrics, + Fulltext: fts, } } diff --git a/setup/config/config_syncapi.go b/setup/config/config_syncapi.go index 4e302ae09..196afb82c 100644 --- a/setup/config/config_syncapi.go +++ b/setup/config/config_syncapi.go @@ -9,6 +9,8 @@ type SyncAPI struct { Database DatabaseOptions `yaml:"database,omitempty"` RealIPHeader string `yaml:"real_ip_header"` + + Fulltext Fulltext `yaml:"fulltext"` } func (c *SyncAPI) Defaults(opts DefaultOpts) { @@ -18,6 +20,7 @@ func (c *SyncAPI) Defaults(opts DefaultOpts) { c.ExternalAPI.Listen = "http://localhost:8073" c.Database.Defaults(20) } + c.Fulltext.Defaults(opts) if opts.Generate { if !opts.Monolithic { c.Database.ConnectionString = "file:syncapi.db" @@ -26,6 +29,7 @@ func (c *SyncAPI) Defaults(opts DefaultOpts) { } func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) { + c.Fulltext.Verify(configErrs, isMonolith) if isMonolith { // polylith required configs below return } @@ -36,3 +40,25 @@ func (c *SyncAPI) Verify(configErrs *ConfigErrors, isMonolith bool) { checkURL(configErrs, "sync_api.internal_api.connect", string(c.InternalAPI.Connect)) checkURL(configErrs, "sync_api.external_api.listen", string(c.ExternalAPI.Listen)) } + +type Fulltext struct { + Enabled bool `yaml:"enabled"` + IndexPath Path `yaml:"index_path"` + InMemory bool `yaml:"in_memory"` // only useful in tests + Language string `yaml:"language"` // the language to use when analysing content +} + +func (f *Fulltext) Defaults(opts DefaultOpts) { + f.Enabled = false + f.IndexPath = "./fulltextindex" + f.Language = "en" + if opts.Generate { + f.Enabled = true + f.InMemory = true + } +} + +func (f *Fulltext) Verify(configErrs *ConfigErrors, isMonolith bool) { + checkNotEmpty(configErrs, "syncapi.fulltext.index_path", string(f.IndexPath)) + checkNotEmpty(configErrs, "syncapi.fulltext.language", f.Language) +} diff --git a/setup/jetstream/streams.go b/setup/jetstream/streams.go index a06a12662..c07d3a0b4 100644 --- a/setup/jetstream/streams.go +++ b/setup/jetstream/streams.go @@ -29,6 +29,7 @@ var ( OutputReadUpdate = "OutputReadUpdate" RequestPresence = "GetPresence" OutputPresenceEvent = "OutputPresenceEvent" + InputFulltextReindex = "InputFulltextReindex" ) var safeCharacters = regexp.MustCompile("[^A-Za-z0-9$]+") diff --git a/test/testrig/base.go b/test/testrig/base.go index 9747ea600..10cc2407b 100644 --- a/test/testrig/base.go +++ b/test/testrig/base.go @@ -22,10 +22,11 @@ import ( "strings" "testing" + "github.com/nats-io/nats.go" + "github.com/matrix-org/dendrite/setup/base" "github.com/matrix-org/dendrite/setup/config" "github.com/matrix-org/dendrite/test" - "github.com/nats-io/nats.go" ) func CreateBaseDendrite(t *testing.T, dbType test.DBType) (*base.BaseDendrite, func()) { @@ -45,6 +46,10 @@ func CreateBaseDendrite(t *testing.T, dbType test.DBType) (*base.BaseDendrite, f Generate: true, Monolithic: true, }) + cfg.SyncAPI.Fulltext.Defaults(config.DefaultOpts{ // use in memory fts + Generate: true, + Monolithic: true, + }) cfg.Global.ServerName = "test" // use a distinct prefix else concurrent postgres/sqlite runs will clash since NATS will use // the file system event with InMemory=true :( @@ -100,6 +105,7 @@ func Base(cfg *config.Dendrite) (*base.BaseDendrite, nats.JetStreamContext, *nat }) } cfg.Global.JetStream.InMemory = true + cfg.SyncAPI.Fulltext.InMemory = true base := base.NewBaseDendrite(cfg, "Tests") js, jc := base.NATS.Prepare(base.ProcessContext, &cfg.Global.JetStream) return base, js, jc