Filter the results of user directory searching via the spam checker (#6888)

Add a method to the spam checker to filter the user directory results.
This commit is contained in:
Patrick Cloke 2020-02-14 07:17:54 -05:00 committed by GitHub
parent df1c98c22a
commit 49f877d32e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 135 additions and 2 deletions

1
changelog.d/6888.feature Normal file
View file

@ -0,0 +1 @@
The result of a user directory search can now be filtered via the spam checker.

View file

@ -54,6 +54,9 @@ class ExampleSpamChecker:
def user_may_publish_room(self, userid, room_id):
return True # allow publishing of all rooms
def check_username_for_spam(self, user_profile):
return False # allow all usernames
```
## Configuration

View file

@ -15,6 +15,7 @@
# limitations under the License.
import inspect
from typing import Dict
from synapse.spam_checker_api import SpamCheckerApi
@ -125,3 +126,29 @@ class SpamChecker(object):
return True
return self.spam_checker.user_may_publish_room(userid, room_id)
def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool:
"""Checks if a user ID or display name are considered "spammy" by this server.
If the server considers a username spammy, then it will not be included in
user directory results.
Args:
user_profile: The user information to check, it contains the keys:
* user_id
* display_name
* avatar_url
Returns:
True if the user is spammy.
"""
if self.spam_checker is None:
return False
# For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering.
checker = getattr(self.spam_checker, "check_username_for_spam", None)
if not checker:
return False
# Make a copy of the user profile object to ensure the spam checker
# cannot modify it.
return checker(user_profile.copy())

View file

@ -52,6 +52,7 @@ class UserDirectoryHandler(StateDeltasHandler):
self.is_mine_id = hs.is_mine_id
self.update_user_directory = hs.config.update_user_directory
self.search_all_users = hs.config.user_directory_search_all_users
self.spam_checker = hs.get_spam_checker()
# The current position in the current_state_delta stream
self.pos = None
@ -65,7 +66,7 @@ class UserDirectoryHandler(StateDeltasHandler):
# we start populating the user directory
self.clock.call_later(0, self.notify_new_event)
def search_users(self, user_id, search_term, limit):
async def search_users(self, user_id, search_term, limit):
"""Searches for users in directory
Returns:
@ -82,7 +83,16 @@ class UserDirectoryHandler(StateDeltasHandler):
]
}
"""
return self.store.search_user_dir(user_id, search_term, limit)
results = await self.store.search_user_dir(user_id, search_term, limit)
# Remove any spammy users from the results.
results["results"] = [
user
for user in results["results"]
if not self.spam_checker.check_username_for_spam(user)
]
return results
def notify_new_event(self):
"""Called when there may be more deltas to process

View file

@ -147,6 +147,98 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
s = self.get_success(self.handler.search_users(u1, "user3", 10))
self.assertEqual(len(s["results"]), 0)
def test_spam_checker(self):
"""
A user which fails to the spam checks will not appear in search results.
"""
u1 = self.register_user("user1", "pass")
u1_token = self.login(u1, "pass")
u2 = self.register_user("user2", "pass")
u2_token = self.login(u2, "pass")
# We do not add users to the directory until they join a room.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)
room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
self.helper.join(room, user=u2, tok=u2_token)
# Check we have populated the database correctly.
shares_private = self.get_users_who_share_private_rooms()
public_users = self.get_users_in_public_rooms()
self.assertEqual(
self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
)
self.assertEqual(public_users, [])
# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)
# Configure a spam checker that does not filter any users.
spam_checker = self.hs.get_spam_checker()
class AllowAll(object):
def check_username_for_spam(self, user_profile):
# Allow all users.
return False
spam_checker.spam_checker = AllowAll()
# The results do not change:
# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)
# Configure a spam checker that filters all users.
class BlockAll(object):
def check_username_for_spam(self, user_profile):
# All users are spammy.
return True
spam_checker.spam_checker = BlockAll()
# User1 now gets no search results for any of the other users.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)
def test_legacy_spam_checker(self):
"""
A spam checker without the expected method should be ignored.
"""
u1 = self.register_user("user1", "pass")
u1_token = self.login(u1, "pass")
u2 = self.register_user("user2", "pass")
u2_token = self.login(u2, "pass")
# We do not add users to the directory until they join a room.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 0)
room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
self.helper.join(room, user=u2, tok=u2_token)
# Check we have populated the database correctly.
shares_private = self.get_users_who_share_private_rooms()
public_users = self.get_users_in_public_rooms()
self.assertEqual(
self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
)
self.assertEqual(public_users, [])
# Configure a spam checker.
spam_checker = self.hs.get_spam_checker()
# The spam checker doesn't need any methods, so create a bare object.
spam_checker.spam_checker = object()
# We get one search result when searching for user2 by user1.
s = self.get_success(self.handler.search_users(u1, "user2", 10))
self.assertEqual(len(s["results"]), 1)
def _compress_shared(self, shared):
"""
Compress a list of users who share rooms dicts to a list of tuples.