This commit is contained in:
Sumner Evans 2021-11-09 16:03:10 -05:00 committed by GitHub
commit 7b0c6e8206
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 119 additions and 80 deletions

1
changelog.d/11216.misc Normal file
View file

@ -0,0 +1 @@
Optimize `UserDirectoryStore.search_user_dir` when search term is empty. Contributed by @sumnerevans at Beeper.

View file

@ -811,94 +811,132 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
if isinstance(self.database_engine, PostgresEngine):
full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
additional_ordering_statements.append(statement)
if not search_term:
logger.debug(
"search_user_dir() No search term provided to user_directory, doing plain list"
)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%s
LIMIT ?
""" % (
where_clause,
)
else:
logger.debug(
"search_user_dir() Search term was provided '%s'" % (search_term)
)
ordering_arguments += ("%:" + self._server_name,)
# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
additional_ordering_statements.append(statement)
# We order by rank and then if they have profile info
# The ranking algorithm is hand tweaked for "best" results. Broadly
# the idea is we give a higher weight to exact matches.
# The array of numbers are the weights for the various part of the
# search: (domain, _, display name, localpart)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%(where_clause)s
AND vector @@ to_tsquery('simple', ?)
ORDER BY
(CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
* (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
* (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END)
* (
3 * ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('simple', ?),
8
ordering_arguments += ("%:" + self._server_name,)
# We order by rank and then if they have profile info
# The ranking algorithm is hand tweaked for "best" results. Broadly
# the idea is we give a higher weight to exact matches.
# The array of numbers are the weights for the various part of the
# search: (domain, _, display name, localpart)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%(where_clause)s
AND vector @@ to_tsquery('simple', ?)
ORDER BY
(CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
* (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
* (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END)
* (
3 * ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('simple', ?),
8
)
+ ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('simple', ?),
8
)
)
+ ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('simple', ?),
8
)
)
%(order_case_statements)s
DESC,
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % {
"where_clause": where_clause,
"order_case_statements": " ".join(additional_ordering_statements),
}
args = (
join_args
+ (full_query, exact_query, prefix_query)
+ ordering_arguments
+ (limit + 1,)
)
%(order_case_statements)s
DESC,
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % {
"where_clause": where_clause,
"order_case_statements": " ".join(additional_ordering_statements),
}
args = (
join_args
+ (full_query, exact_query, prefix_query)
+ ordering_arguments
+ (limit + 1,)
)
elif isinstance(self.database_engine, Sqlite3Engine):
search_query = _parse_query_sqlite(search_term)
# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
#
# Note that we need to include a comma at the end for valid SQL
statement = "user_id LIKE ? DESC,"
additional_ordering_statements.append(statement)
if not search_term:
logger.debug(
"search_user_dir() No search term provided to user_directory, doing plain list"
)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%s
LIMIT ?
""" % (
where_clause,
)
else:
logger.debug(
"search_user_dir() Search term was provided '%s'" % (search_term)
)
ordering_arguments += ("%:" + self._server_name,)
# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
#
# Note that we need to include a comma at the end for valid SQL
statement = "user_id LIKE ? DESC,"
additional_ordering_statements.append(statement)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%(where_clause)s
AND value MATCH ?
ORDER BY
rank(matchinfo(user_directory_search)) DESC,
%(order_statements)s
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % {
"where_clause": where_clause,
"order_statements": " ".join(additional_ordering_statements),
}
args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
ordering_arguments += ("%:" + self._server_name,)
sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%(where_clause)s
AND value MATCH ?
ORDER BY
rank(matchinfo(user_directory_search)) DESC,
%(order_statements)s
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % {
"where_clause": where_clause,
"order_statements": " ".join(additional_ordering_statements),
}
args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")