mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-19 18:44:01 +01:00
Merge pull request #399 from matrix-org/erikj/search
Return words to highlight in search results
This commit is contained in:
commit
d0f28b46cd
2 changed files with 120 additions and 22 deletions
|
@ -139,11 +139,18 @@ class SearchHandler(BaseHandler):
|
||||||
# Holds the next_batch for the entire result set if one of those exists
|
# Holds the next_batch for the entire result set if one of those exists
|
||||||
global_next_batch = None
|
global_next_batch = None
|
||||||
|
|
||||||
|
highlights = set()
|
||||||
|
|
||||||
if order_by == "rank":
|
if order_by == "rank":
|
||||||
results = yield self.store.search_msgs(
|
search_result = yield self.store.search_msgs(
|
||||||
room_ids, search_term, keys
|
room_ids, search_term, keys
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if search_result["highlights"]:
|
||||||
|
highlights.update(search_result["highlights"])
|
||||||
|
|
||||||
|
results = search_result["results"]
|
||||||
|
|
||||||
results_map = {r["event"].event_id: r for r in results}
|
results_map = {r["event"].event_id: r for r in results}
|
||||||
|
|
||||||
rank_map.update({r["event"].event_id: r["rank"] for r in results})
|
rank_map.update({r["event"].event_id: r["rank"] for r in results})
|
||||||
|
@ -187,11 +194,16 @@ class SearchHandler(BaseHandler):
|
||||||
# But only go around 5 times since otherwise synapse will be sad.
|
# But only go around 5 times since otherwise synapse will be sad.
|
||||||
while len(room_events) < search_filter.limit() and i < 5:
|
while len(room_events) < search_filter.limit() and i < 5:
|
||||||
i += 1
|
i += 1
|
||||||
results = yield self.store.search_room(
|
search_result = yield self.store.search_room(
|
||||||
room_id, search_term, keys, search_filter.limit() * 2,
|
room_id, search_term, keys, search_filter.limit() * 2,
|
||||||
pagination_token=pagination_token,
|
pagination_token=pagination_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if search_result["highlights"]:
|
||||||
|
highlights.update(search_result["highlights"])
|
||||||
|
|
||||||
|
results = search_result["results"]
|
||||||
|
|
||||||
results_map = {r["event"].event_id: r for r in results}
|
results_map = {r["event"].event_id: r for r in results}
|
||||||
|
|
||||||
rank_map.update({r["event"].event_id: r["rank"] for r in results})
|
rank_map.update({r["event"].event_id: r["rank"] for r in results})
|
||||||
|
@ -347,7 +359,8 @@ class SearchHandler(BaseHandler):
|
||||||
|
|
||||||
rooms_cat_res = {
|
rooms_cat_res = {
|
||||||
"results": results,
|
"results": results,
|
||||||
"count": len(results)
|
"count": len(results),
|
||||||
|
"highlights": list(highlights),
|
||||||
}
|
}
|
||||||
|
|
||||||
if state_results:
|
if state_results:
|
||||||
|
|
|
@ -20,6 +20,7 @@ from synapse.api.errors import SynapseError
|
||||||
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -194,14 +195,21 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
for ev in events
|
for ev in events
|
||||||
}
|
}
|
||||||
|
|
||||||
defer.returnValue([
|
highlights = None
|
||||||
{
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
"event": event_map[r["event_id"]],
|
highlights = yield self._find_highlights_in_postgres(search_term, events)
|
||||||
"rank": r["rank"],
|
|
||||||
}
|
defer.returnValue({
|
||||||
for r in results
|
"results": [
|
||||||
if r["event_id"] in event_map
|
{
|
||||||
])
|
"event": event_map[r["event_id"]],
|
||||||
|
"rank": r["rank"],
|
||||||
|
}
|
||||||
|
for r in results
|
||||||
|
if r["event_id"] in event_map
|
||||||
|
],
|
||||||
|
"highlights": highlights,
|
||||||
|
})
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def search_room(self, room_id, search_term, keys, limit, pagination_token=None):
|
def search_room(self, room_id, search_term, keys, limit, pagination_token=None):
|
||||||
|
@ -294,14 +302,91 @@ class SearchStore(BackgroundUpdateStore):
|
||||||
for ev in events
|
for ev in events
|
||||||
}
|
}
|
||||||
|
|
||||||
defer.returnValue([
|
highlights = None
|
||||||
{
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
"event": event_map[r["event_id"]],
|
highlights = yield self._find_highlights_in_postgres(search_term, events)
|
||||||
"rank": r["rank"],
|
|
||||||
"pagination_token": "%s,%s" % (
|
defer.returnValue({
|
||||||
r["topological_ordering"], r["stream_ordering"]
|
"results": [
|
||||||
),
|
{
|
||||||
}
|
"event": event_map[r["event_id"]],
|
||||||
for r in results
|
"rank": r["rank"],
|
||||||
if r["event_id"] in event_map
|
"pagination_token": "%s,%s" % (
|
||||||
])
|
r["topological_ordering"], r["stream_ordering"]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
for r in results
|
||||||
|
if r["event_id"] in event_map
|
||||||
|
],
|
||||||
|
"highlights": highlights,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _find_highlights_in_postgres(self, search_term, events):
|
||||||
|
"""Given a list of events and a search term, return a list of words
|
||||||
|
that match from the content of the event.
|
||||||
|
|
||||||
|
This is used to give a list of words that clients can match against to
|
||||||
|
highlight the matching parts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
search_term (str)
|
||||||
|
events (list): A list of events
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
deferred : A set of strings.
|
||||||
|
"""
|
||||||
|
def f(txn):
|
||||||
|
highlight_words = set()
|
||||||
|
for event in events:
|
||||||
|
# As a hack we simply join values of all possible keys. This is
|
||||||
|
# fine since we're only using them to find possible highlights.
|
||||||
|
values = []
|
||||||
|
for key in ("body", "name", "topic"):
|
||||||
|
v = event.content.get(key, None)
|
||||||
|
if v:
|
||||||
|
values.append(v)
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
continue
|
||||||
|
|
||||||
|
value = " ".join(values)
|
||||||
|
|
||||||
|
# We need to find some values for StartSel and StopSel that
|
||||||
|
# aren't in the value so that we can pick results out.
|
||||||
|
start_sel = "<"
|
||||||
|
stop_sel = ">"
|
||||||
|
|
||||||
|
while start_sel in value:
|
||||||
|
start_sel += "<"
|
||||||
|
while stop_sel in value:
|
||||||
|
stop_sel += ">"
|
||||||
|
|
||||||
|
query = "SELECT ts_headline(?, plainto_tsquery('english', ?), %s)" % (
|
||||||
|
_to_postgres_options({
|
||||||
|
"StartSel": start_sel,
|
||||||
|
"StopSel": stop_sel,
|
||||||
|
"MaxFragments": "50",
|
||||||
|
})
|
||||||
|
)
|
||||||
|
txn.execute(query, (value, search_term,))
|
||||||
|
headline, = txn.fetchall()[0]
|
||||||
|
|
||||||
|
# Now we need to pick the possible highlights out of the haedline
|
||||||
|
# result.
|
||||||
|
matcher_regex = "%s(.*?)%s" % (
|
||||||
|
re.escape(start_sel),
|
||||||
|
re.escape(stop_sel),
|
||||||
|
)
|
||||||
|
|
||||||
|
res = re.findall(matcher_regex, headline)
|
||||||
|
highlight_words.update([r.lower() for r in res])
|
||||||
|
|
||||||
|
return highlight_words
|
||||||
|
|
||||||
|
return self.runInteraction("_find_highlights", f)
|
||||||
|
|
||||||
|
|
||||||
|
def _to_postgres_options(options_dict):
|
||||||
|
return "'%s'" % (
|
||||||
|
",".join("%s=%s" % (k, v) for k, v in options_dict.items()),
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue