diff --git a/extensions/Makefile.am b/extensions/Makefile.am index 0edd0d719..0137b8897 100644 --- a/extensions/Makefile.am +++ b/extensions/Makefile.am @@ -1,7 +1,7 @@ AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/librb/include $(LTDLINCL) AM_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -shared AM_LDFLAGS += -export-symbols-regex _mheader -LIBS += $(top_srcdir)/ircd/libircd.la +LIBS += $(top_srcdir)/librb/src/librb.la $(top_srcdir)/ircd/libircd.la extensiondir=@moduledir@/extensions @@ -64,6 +64,7 @@ extension_LTLIBRARIES = \ no_kill_services.la \ no_locops.la \ no_oper_invis.la \ + spamfilter_nicks.la \ spy_admin_notice.la \ spy_info_notice.la \ spy_links_notice.la \ diff --git a/extensions/spamfilter_nicks.c b/extensions/spamfilter_nicks.c new file mode 100644 index 000000000..8c18a2010 --- /dev/null +++ b/extensions/spamfilter_nicks.c @@ -0,0 +1,431 @@ +/************************************************************************ + * charybdis: an advanced ircd. extensions/spamfilter_nicks.c + * Copyright (C) 2016 Jason Volk + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stdinc.h" +#include "modules.h" +#include "hook.h" +#include "client.h" +#include "send.h" +#include "hash.h" +#include "newconf.h" +#include "spamfilter.h" + + +/* Conf items & defaults */ +size_t conf_limit = 5; +size_t conf_nicklen_min = 4; +size_t conf_bloom_size = 1024 * 64; +size_t conf_bloom_bits = 16; +time_t conf_bloom_refresh = 86400; + + +/* Bloom filter hashes */ +static +uint64_t bloom_hash_fnv(const char *const str) +{ + return fnv_hash_upper((const unsigned char *)str, conf_bloom_bits); +} + +static +uint64_t bloom_hash_bernstein(const char *const str) +{ + uint64_t ret = 7681; + for(size_t i = 0; str[i]; i++) + ret = ret * 33ULL + str[i]; + + return ret; +} + +#define NUM_HASHES 2 +uint64_t (*bloom_hashes[NUM_HASHES])(const char *const str) = +{ + bloom_hash_fnv, + bloom_hash_bernstein, +}; + + +/* Bloom filter state */ +uint8_t *bloom[NUM_HASHES]; +uint64_t bloom_salt; +size_t bloom_size; +size_t bloom_members; +time_t bloom_flushed; +struct rb_radixtree *chans; // Channels with MODE_SPAMFILTER that participate in the bloom filter + + +static +void bloom_flush(void) +{ + for(size_t i = 0; i < NUM_HASHES; i++) + memset(bloom[i], 0x0, bloom_size); + + bloom_flushed = rb_current_time(); + bloom_members = 0; +} + +static +void bloom_destroy(void) +{ + for(size_t i = 0; i < NUM_HASHES; i++) + { + rb_free(bloom[i]); + bloom[i] = NULL; + } + + bloom_members = 0; + bloom_size = 0; +} + +static +void bloom_create(const size_t size) +{ + if(!size) + return; + + for(size_t i = 0; i < NUM_HASHES; i++) + bloom[i] = rb_malloc(size); + + bloom_size = size; + bloom_flush(); +} + +static +void bloom_add(const size_t filter, + uint64_t hash) +{ + hash += bloom_salt; + hash %= bloom_size * 8UL; + bloom[filter][hash / 8UL] |= (1U << (hash % 8UL)); +} + +static +int bloom_test(const size_t filter, + uint64_t hash) +{ + hash += bloom_salt; + hash %= bloom_size * 8UL; + const int bit = hash % 8UL; + return (bloom[filter][hash / 8UL] & (1U << bit)) >> bit; +} + +static +void bloom_add_str(const char *const str) +{ + for(size_t i = 0; i < NUM_HASHES; i++) + bloom_add(i, bloom_hashes[i](str)); + + bloom_members++; +} + +static +int bloom_test_str(const char *const str) +{ + unsigned int count = 0; + for(size_t i = 0; i < NUM_HASHES; i++) + count += bloom_test(i, bloom_hashes[i](str)); + + return count >= NUM_HASHES; +} + + +static +int chans_has(const struct Channel *const chptr) +{ + return rb_radixtree_retrieve(chans, chptr->chname) != NULL; +} + +static +int chans_add(struct Channel *const chptr) +{ + if(!rb_radixtree_add(chans, chptr->chname, chptr)) + return 0; + + rb_dlink_node *ptr; + RB_DLINK_FOREACH(ptr, chptr->members.head) + { + const struct membership *const msptr = ptr->data; + bloom_add_str(msptr->client_p->name); + } + + return 1; +} + + +static +int expired(void) +{ + return bloom_flushed + conf_bloom_refresh < rb_current_time(); +} + +static +void reset(void) +{ + if(bloom[0]) + bloom_flush(); + + if(chans) + rb_radixtree_destroy(chans, NULL, NULL); + + chans = rb_radixtree_create("chans", irccasecanon); +} + +static +void resize(const size_t size) +{ + bloom_destroy(); + reset(); + bloom_create(size); +} + + +static +int prob_test_token(const char *const token) +{ + return bloom_test_str(token); +} + + +static +int real_test_token(const char *const token, + struct Channel *const chptr) +{ + struct Client *const client = find_named_client(token); + return client && IsMember(client, chptr); +} + + +static +void false_positive_message(void) +{ + sendto_realops_snomask(SNO_GENERAL, L_ALL, + "spamfilter: Nickname bloom filter false positive (size: %zu members: %zu channels: %u flushed: %lu ago)", + bloom_size, + bloom_members, + rb_radixtree_size(chans), + rb_current_time() - bloom_flushed); +} + + +/* + * Always find the length of any multibyte character to advance past. + * The unicode space characters of concern are only of length 3. + */ +static +int is_delim(const uint8_t *const ptr, + unsigned int *const adv) +{ + /* Some ascii ranges */ + if((ptr[0] >= 0x20 && ptr[0] <= 0x2F) || + (ptr[0] >= 0x3A && ptr[0] <= 0x40) || + (ptr[0] >= 0x5C && ptr[0] <= 0x60) || + (ptr[0] >= 0x7B && ptr[0] <= 0x7F)) + return 1; + + /* Unicode below here */ + const int len = ((ptr[0] & 0x80) == 0x80)+ + ((ptr[0] & 0xC0) == 0xC0)+ + ((ptr[0] & 0xE0) == 0xE0)+ + ((ptr[0] & 0xF0) == 0xF0)+ + ((ptr[0] & 0xF8) == 0xF8)+ + ((ptr[0] & 0xFC) == 0xFC); + + if(len) + *adv += len - 1; + + if(len != 3) + return 0; + + switch((htonl(*(const uint32_t *)ptr) & 0x1F7F7F00U) >> 8) + { + case 0x20000: + case 0x20001: + case 0x20002: + case 0x20003: + case 0x20004: + case 0x20005: + case 0x20006: + case 0x20007: + case 0x20008: + case 0x20009: + case 0x2000A: + case 0x2000B: + case 0x2002F: + case 0x2005F: + case 0x30000: + case 0xf3b3f: + return 1; + + default: + return 0; + } +} + + +static unsigned int +count_nicks(const char *const text, + struct Channel *const chptr) +{ + unsigned int ret = 0; + const size_t len = strlen(text); + + for(unsigned int i = 0, j = 0, k = 0; i + 6 < len; i++) + { + if(!is_delim((const unsigned char *) text+i, &k)) + { + j++; + continue; + } + + if(j >= conf_nicklen_min && j <= NICKLEN) + { + char token[NICKLEN+1]; + rb_strlcpy(token, text+i-j, j+1); + if(prob_test_token(token)) + { + if(rb_likely(real_test_token(token, chptr))) + ret++; + else + false_positive_message(); + } + } + + i += k; + j = 0; + k = 0; + } + + return ret; +} + + +static +void hook_spamfilter_query(hook_data_privmsg_channel *const hook) +{ + if(hook->approved != 0) + return; + + if(!bloom[0]) + return; + + const unsigned int counted = count_nicks(hook->text, hook->chptr); + if(counted < conf_limit) + return; + + static char reason[64]; + snprintf(reason, sizeof(reason), "nicks: counted at least %u names", counted); + hook->reason = reason; + hook->approved = -1; +} + + +static +void hook_channel_join(hook_data_channel_approval *const data) +{ + if(~data->chptr->mode.mode & chmode_table[(uint8_t)MODE_SPAMFILTER].mode_type) + return; + + if(!bloom[0]) + return; + + if(expired()) + reset(); + + if(chans_has(data->chptr)) + bloom_add_str(data->client->name); + else + chans_add(data->chptr); +} + + +static +int conf_spamfilter_nicks_end(struct TopConf *const tc) +{ + if(conf_bloom_size != bloom_size) + resize(conf_bloom_size); + + return 0; +} + + +static void set_conf_limit(void *const val) { conf_limit = *(int *)val; } +static void set_conf_nicklen_min(void *const val) { conf_nicklen_min = *(int *)val; } +static void set_conf_bloom_size(void *const val) { conf_bloom_size = *(int *)val; } +static void set_conf_bloom_bits(void *const val) { conf_bloom_bits = *(int *)val; } +static void set_conf_bloom_refresh(void *const val) { conf_bloom_refresh = *(time_t *)val; } + + +struct ConfEntry conf_spamfilter_nicks[] = +{ + { "limit", CF_INT, set_conf_limit, 0, NULL }, + { "nicklen_min", CF_INT, set_conf_nicklen_min, 0, NULL }, + { "bloom_size", CF_INT, set_conf_bloom_size, 0, NULL }, + { "bloom_bits", CF_INT, set_conf_bloom_bits, 0, NULL }, + { "bloom_refresh", CF_TIME, set_conf_bloom_refresh, 0, NULL }, + { "\0", 0, NULL, 0, NULL } +}; + + +static +int modinit(void) +{ + add_top_conf("spamfilter_nicks", NULL, conf_spamfilter_nicks_end, conf_spamfilter_nicks); + rb_get_random(&bloom_salt, sizeof(bloom_salt)); + resize(conf_bloom_size); + return 0; +} + + +static +void modfini(void) +{ + bloom_destroy(); + rb_radixtree_destroy(chans, NULL, NULL); + remove_top_conf("spamfilter_nicks"); +} + + +mapi_hfn_list_av1 hfnlist[] = +{ + { "spamfilter_query", (hookfn)hook_spamfilter_query }, + { "channel_join", (hookfn)hook_channel_join }, + { NULL, NULL } +}; + +DECLARE_MODULE_AV1 +( + spamfilter_nicks, + modinit, + modfini, + NULL, + NULL, + hfnlist, + "$Revision: 0 $" +);