forked from MirrorHub/synapse
Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) (#10820)
* add test to check if null code points are being inserted * add logic to detect and replace null code points before insertion into db * lints * add license to test * change approach to null substitution * add type hint for SearchEntry * Add changelog entry Signed-off-by: H.Shay <shaysquared@gmail.com> * updated changelog * update chanelog message * remove duplicate changelog * Update synapse/storage/databases/main/events.py remove extra space Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> * rename and move test file, update tests, delete old test file * fix typo in comments * update _find_highlights_in_postgres to replace null byte with space * replace null byte in sqlite search insertion * beef up and reorganize test for this pr * update changelog * add type hints and update docstring * check db engine directly vs using env variable * refactor tests to be less repetetive * move rplace logic into seperate function * requested changes * Fix typo. * Update synapse/storage/databases/main/search.py Co-authored-by: reivilibre <olivier@librepush.net> * Update changelog.d/10820.misc Co-authored-by: Aaron Raimist <aaron@raim.ist> Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> Co-authored-by: reivilibre <olivier@librepush.net> Co-authored-by: Aaron Raimist <aaron@raim.ist>
This commit is contained in:
parent
03db6701d5
commit
f78b68a96b
3 changed files with 100 additions and 9 deletions
1
changelog.d/10820.misc
Normal file
1
changelog.d/10820.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error.
|
|
@ -15,12 +15,12 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import Collection, List, Optional, Set
|
from typing import Collection, Iterable, List, Optional, Set
|
||||||
|
|
||||||
from synapse.api.errors import SynapseError
|
from synapse.api.errors import SynapseError
|
||||||
from synapse.events import EventBase
|
from synapse.events import EventBase
|
||||||
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
|
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
|
||||||
from synapse.storage.database import DatabasePool
|
from synapse.storage.database import DatabasePool, LoggingTransaction
|
||||||
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
|
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
|
||||||
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
||||||
|
|
||||||
|
@ -32,14 +32,24 @@ SearchEntry = namedtuple(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_value_for_search(value: str) -> str:
|
||||||
|
"""
|
||||||
|
Replaces any null code points in the string with spaces as
|
||||||
|
Postgres and SQLite do not like the insertion of strings with
|
||||||
|
null code points into the full-text search tables.
|
||||||
|
"""
|
||||||
|
return value.replace("\u0000", " ")
|
||||||
|
|
||||||
|
|
||||||
class SearchWorkerStore(SQLBaseStore):
|
class SearchWorkerStore(SQLBaseStore):
|
||||||
def store_search_entries_txn(self, txn, entries):
|
def store_search_entries_txn(
|
||||||
|
self, txn: LoggingTransaction, entries: Iterable[SearchEntry]
|
||||||
|
) -> None:
|
||||||
"""Add entries to the search table
|
"""Add entries to the search table
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
txn (cursor):
|
txn:
|
||||||
entries (iterable[SearchEntry]):
|
entries: entries to be added to the table
|
||||||
entries to be added to the table
|
|
||||||
"""
|
"""
|
||||||
if not self.hs.config.enable_search:
|
if not self.hs.config.enable_search:
|
||||||
return
|
return
|
||||||
|
@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore):
|
||||||
entry.event_id,
|
entry.event_id,
|
||||||
entry.room_id,
|
entry.room_id,
|
||||||
entry.key,
|
entry.key,
|
||||||
entry.value,
|
_clean_value_for_search(entry.value),
|
||||||
entry.stream_ordering,
|
entry.stream_ordering,
|
||||||
entry.origin_server_ts,
|
entry.origin_server_ts,
|
||||||
)
|
)
|
||||||
|
@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore):
|
||||||
" VALUES (?,?,?,?)"
|
" VALUES (?,?,?,?)"
|
||||||
)
|
)
|
||||||
args = (
|
args = (
|
||||||
(entry.event_id, entry.room_id, entry.key, entry.value)
|
(
|
||||||
|
entry.event_id,
|
||||||
|
entry.room_id,
|
||||||
|
entry.key,
|
||||||
|
_clean_value_for_search(entry.value),
|
||||||
|
)
|
||||||
for entry in entries
|
for entry in entries
|
||||||
)
|
)
|
||||||
|
|
||||||
txn.execute_batch(sql, args)
|
txn.execute_batch(sql, args)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# This should be unreachable.
|
# This should be unreachable.
|
||||||
raise Exception("Unrecognized database engine")
|
raise Exception("Unrecognized database engine")
|
||||||
|
@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore):
|
||||||
for key in ("body", "name", "topic"):
|
for key in ("body", "name", "topic"):
|
||||||
v = event.content.get(key, None)
|
v = event.content.get(key, None)
|
||||||
if v:
|
if v:
|
||||||
|
v = _clean_value_for_search(v)
|
||||||
values.append(v)
|
values.append(v)
|
||||||
|
|
||||||
if not values:
|
if not values:
|
||||||
|
|
74
tests/storage/test_room_search.py
Normal file
74
tests/storage/test_room_search.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import synapse.rest.admin
|
||||||
|
from synapse.rest.client import login, room
|
||||||
|
from synapse.storage.engines import PostgresEngine
|
||||||
|
|
||||||
|
from tests.unittest import HomeserverTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class NullByteInsertionTest(HomeserverTestCase):
|
||||||
|
servlets = [
|
||||||
|
synapse.rest.admin.register_servlets_for_client_rest_resource,
|
||||||
|
login.register_servlets,
|
||||||
|
room.register_servlets,
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_null_byte(self):
|
||||||
|
"""
|
||||||
|
Postgres/SQLite don't like null bytes going into the search tables. Internally
|
||||||
|
we replace those with a space.
|
||||||
|
|
||||||
|
Ensure this doesn't break anything.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Register a user and create a room, create some messages
|
||||||
|
self.register_user("alice", "password")
|
||||||
|
access_token = self.login("alice", "password")
|
||||||
|
room_id = self.helper.create_room_as("alice", tok=access_token)
|
||||||
|
|
||||||
|
# Send messages and ensure they don't cause an internal server
|
||||||
|
# error
|
||||||
|
for body in ["hi\u0000bob", "another message", "hi alice"]:
|
||||||
|
response = self.helper.send(room_id, body, tok=access_token)
|
||||||
|
self.assertIn("event_id", response)
|
||||||
|
|
||||||
|
# Check that search works for the message where the null byte was replaced
|
||||||
|
store = self.hs.get_datastore()
|
||||||
|
result = self.get_success(
|
||||||
|
store.search_msgs([room_id], "hi bob", ["content.body"])
|
||||||
|
)
|
||||||
|
self.assertEquals(result.get("count"), 1)
|
||||||
|
if isinstance(store.database_engine, PostgresEngine):
|
||||||
|
self.assertIn("hi", result.get("highlights"))
|
||||||
|
self.assertIn("bob", result.get("highlights"))
|
||||||
|
|
||||||
|
# Check that search works for an unrelated message
|
||||||
|
result = self.get_success(
|
||||||
|
store.search_msgs([room_id], "another", ["content.body"])
|
||||||
|
)
|
||||||
|
self.assertEquals(result.get("count"), 1)
|
||||||
|
if isinstance(store.database_engine, PostgresEngine):
|
||||||
|
self.assertIn("another", result.get("highlights"))
|
||||||
|
|
||||||
|
# Check that search works for a search term that overlaps with the message
|
||||||
|
# containing a null byte and an unrelated message.
|
||||||
|
result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"]))
|
||||||
|
self.assertEquals(result.get("count"), 2)
|
||||||
|
result = self.get_success(
|
||||||
|
store.search_msgs([room_id], "hi alice", ["content.body"])
|
||||||
|
)
|
||||||
|
if isinstance(store.database_engine, PostgresEngine):
|
||||||
|
self.assertIn("alice", result.get("highlights"))
|
Loading…
Reference in a new issue