From c8baada94a6539cfcd1ec1316892302ae2271f4c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Oct 2015 17:09:53 +0100 Subject: [PATCH 1/9] Filter search results --- synapse/handlers/search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 22808b9c07..473aab53f0 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,6 +18,7 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.api.constants import Membership +from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -49,9 +50,12 @@ class SearchHandler(BaseHandler): keys = content["search_categories"]["room_events"].get("keys", [ "content.body", "content.name", "content.topic", ]) + filter_dict = content["search_categories"]["room_events"].get("filter", {}) except KeyError: raise SynapseError(400, "Invalid search query") + filtr = Filter(filter_dict) + # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( user.to_string(), @@ -64,11 +68,12 @@ class SearchHandler(BaseHandler): rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + filtered_events = filtr.filter(event_map.values()) + allowed_events = yield self._filter_events_for_client( - user.to_string(), event_map.values() + user.to_string(), filtered_events ) - # TODO: Filter allowed_events # TODO: Add a limit time_now = self.clock.time_msec() From 5c41224a89a9ceedeb5db10f972c10344382faf2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Oct 2015 10:09:26 +0100 Subject: [PATCH 2/9] Filter room ids before hitting the database --- synapse/api/filtering.py | 20 ++++++++++++++++++++ synapse/handlers/search.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 60b6648e0d..ab14b47281 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -202,6 +202,26 @@ class Filter(object): return True + def filter_rooms(self, room_ids): + """Apply the 'rooms' filter to a given list of rooms. + + Args: + room_ids (list): A list of room_ids. + + Returns: + list: A list of room_ids that match the filter + """ + room_ids = set(room_ids) + + disallowed_rooms = set(self.filter_json.get("not_rooms", [])) + room_ids -= disallowed_rooms + + allowed_rooms = self.filter_json.get("rooms", None) + if allowed_rooms is not None: + room_ids &= set(allowed_rooms) + + return room_ids + def filter(self, events): return filter(self.check, events) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 473aab53f0..f53e5d35ac 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -64,7 +64,7 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - # TODO: Apply room filter to rooms list + room_ids = filtr.filter_rooms(room_ids) rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) From 4d25bc6c92c3d219786892c5d510e0c4c4eb8b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:12:28 +0100 Subject: [PATCH 3/9] Move FTS to delta 25 --- synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/{24 => 25}/fts.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename synapse/storage/schema/delta/{24 => 25}/fts.py (96%) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1ddf55be4d..1a74d6e360 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 24 +SCHEMA_VERSION = 25 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/25/fts.py similarity index 96% rename from synapse/storage/schema/delta/24/fts.py rename to synapse/storage/schema/delta/25/fts.py index 0c752d8426..fd181fc630 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) POSTGRES_SQL = """ -CREATE TABLE event_search ( +CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, key TEXT, @@ -53,7 +53,7 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" ) From f142898f529f89c5bd90710db2d0771894b0b33f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:18:01 +0100 Subject: [PATCH 4/9] PEP8 --- synapse/storage/schema/delta/25/fts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index fd181fc630..ed3cc06557 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -53,7 +53,8 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" + " USING fts3 ( event_id, room_id, key, value)" ) From ba02bba88c6895bc9196d226a2bbc8047b93e28b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 13:25:22 +0100 Subject: [PATCH 5/9] Limit max number of SQL vars --- synapse/storage/search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index a3c69c5ab3..810b5406ad 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -36,10 +36,12 @@ class SearchStore(SQLBaseStore): clauses = [] args = [] - clauses.append( - "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) - ) - args.extend(room_ids) + # Make sure we don't explode because the person is in too many rooms. + if len(room_ids) > 500: + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) local_clauses = [] for key in keys: From 232beb3a3c28ccdc5388daa9396d5054b7768b12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 15:02:35 +0100 Subject: [PATCH 6/9] Use namedtuple as return value --- synapse/handlers/search.py | 4 +++- synapse/storage/search.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index f53e5d35ac..bdc79ffc55 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -66,7 +66,9 @@ class SearchHandler(BaseHandler): room_ids = filtr.filter_rooms(room_ids) - rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) filtered_events = filtr.filter(event_map.values()) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 810b5406ad..41451ade57 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,6 +18,17 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from collections import namedtuple + +"""The result of a search. + +Fields: + rank_map (dict): Mapping event_id -> rank + event_map (dict): Mapping event_id -> event + pagination_token (str): Pagination token +""" +SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) + class SearchStore(SQLBaseStore): @defer.inlineCallbacks @@ -31,7 +42,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - 2-tuple of (dict event_id -> rank, dict event_id -> event) + SearchResult """ clauses = [] args = [] @@ -85,11 +96,12 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(( + defer.returnValue(SearchResult( { r["event_id"]: r["rank"] for r in results if r["event_id"] in event_map }, - event_map + event_map, + None )) From fb0fecd0b9f430670ca6de1f4746601dd6cc24f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:18:35 +0100 Subject: [PATCH 7/9] LESS THAN --- synapse/storage/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 41451ade57..e658e07dc7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,8 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - if len(room_ids) > 500: + # We filter the results regardless. + if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) From 2980136d7535077f0513b8a12fd7f224700ca140 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:19:53 +0100 Subject: [PATCH 8/9] Rename --- synapse/handlers/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bdc79ffc55..bbe82b1425 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -54,7 +54,7 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") - filtr = Filter(filter_dict) + search_filter = Filter(filter_dict) # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( @@ -64,13 +64,13 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = filtr.filter_rooms(room_ids) + room_ids = search_filter.filter_rooms(room_ids) rank_map, event_map, _ = yield self.store.search_msgs( room_ids, search_term, keys ) - filtered_events = filtr.filter(event_map.values()) + filtered_events = search_filter.filter(event_map.values()) allowed_events = yield self._filter_events_for_client( user.to_string(), filtered_events From 671ac699f1d4672bed3817b3cafb7498df99c030 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:54:56 +0100 Subject: [PATCH 9/9] Actually filter results --- synapse/storage/search.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e658e07dc7..9608b5d6a7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,7 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - # We filter the results regardless. + # We filter the results below regardless. if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) @@ -66,13 +66,13 @@ class SearchStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, event_id" + "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" " FROM plainto_tsquery('english', ?) as query, event_search" " WHERE vector @@ query" ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT 0 as rank, event_id FROM event_search" + "SELECT 0 as rank, room_id, event_id FROM event_search" " WHERE value MATCH ?" ) else: @@ -90,6 +90,8 @@ class SearchStore(SQLBaseStore): "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) + results = filter(lambda row: row["room_id"] in room_ids, results) + events = yield self._get_events([r["event_id"] for r in results]) event_map = {