0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-12-28 00:14:07 +01:00

spamfilter: import @jevolk's spamfilter_nicks module

This commit is contained in:
William Pitcock 2016-06-25 23:19:40 -05:00
parent 6727daa33e
commit 1a2426007f
2 changed files with 433 additions and 1 deletions

View file

@ -1,7 +1,7 @@
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/librb/include $(LTDLINCL)
AM_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -shared
AM_LDFLAGS += -export-symbols-regex _mheader
LIBS += $(top_srcdir)/ircd/libircd.la
LIBS += $(top_srcdir)/librb/src/librb.la $(top_srcdir)/ircd/libircd.la
extensiondir=@moduledir@/extensions
@ -64,6 +64,7 @@ extension_LTLIBRARIES = \
no_kill_services.la \
no_locops.la \
no_oper_invis.la \
spamfilter_nicks.la \
spy_admin_notice.la \
spy_info_notice.la \
spy_links_notice.la \

View file

@ -0,0 +1,431 @@
/************************************************************************
* charybdis: an advanced ircd. extensions/spamfilter_nicks.c
* Copyright (C) 2016 Jason Volk
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "stdinc.h"
#include "modules.h"
#include "hook.h"
#include "client.h"
#include "send.h"
#include "hash.h"
#include "newconf.h"
#include "spamfilter.h"
/* Conf items & defaults */
size_t conf_limit = 5;
size_t conf_nicklen_min = 4;
size_t conf_bloom_size = 1024 * 64;
size_t conf_bloom_bits = 16;
time_t conf_bloom_refresh = 86400;
/* Bloom filter hashes */
static
uint64_t bloom_hash_fnv(const char *const str)
{
return fnv_hash_upper((const unsigned char *)str, conf_bloom_bits);
}
static
uint64_t bloom_hash_bernstein(const char *const str)
{
uint64_t ret = 7681;
for(size_t i = 0; str[i]; i++)
ret = ret * 33ULL + str[i];
return ret;
}
#define NUM_HASHES 2
uint64_t (*bloom_hashes[NUM_HASHES])(const char *const str) =
{
bloom_hash_fnv,
bloom_hash_bernstein,
};
/* Bloom filter state */
uint8_t *bloom[NUM_HASHES];
uint64_t bloom_salt;
size_t bloom_size;
size_t bloom_members;
time_t bloom_flushed;
struct rb_radixtree *chans; // Channels with MODE_SPAMFILTER that participate in the bloom filter
static
void bloom_flush(void)
{
for(size_t i = 0; i < NUM_HASHES; i++)
memset(bloom[i], 0x0, bloom_size);
bloom_flushed = rb_current_time();
bloom_members = 0;
}
static
void bloom_destroy(void)
{
for(size_t i = 0; i < NUM_HASHES; i++)
{
rb_free(bloom[i]);
bloom[i] = NULL;
}
bloom_members = 0;
bloom_size = 0;
}
static
void bloom_create(const size_t size)
{
if(!size)
return;
for(size_t i = 0; i < NUM_HASHES; i++)
bloom[i] = rb_malloc(size);
bloom_size = size;
bloom_flush();
}
static
void bloom_add(const size_t filter,
uint64_t hash)
{
hash += bloom_salt;
hash %= bloom_size * 8UL;
bloom[filter][hash / 8UL] |= (1U << (hash % 8UL));
}
static
int bloom_test(const size_t filter,
uint64_t hash)
{
hash += bloom_salt;
hash %= bloom_size * 8UL;
const int bit = hash % 8UL;
return (bloom[filter][hash / 8UL] & (1U << bit)) >> bit;
}
static
void bloom_add_str(const char *const str)
{
for(size_t i = 0; i < NUM_HASHES; i++)
bloom_add(i, bloom_hashes[i](str));
bloom_members++;
}
static
int bloom_test_str(const char *const str)
{
unsigned int count = 0;
for(size_t i = 0; i < NUM_HASHES; i++)
count += bloom_test(i, bloom_hashes[i](str));
return count >= NUM_HASHES;
}
static
int chans_has(const struct Channel *const chptr)
{
return rb_radixtree_retrieve(chans, chptr->chname) != NULL;
}
static
int chans_add(struct Channel *const chptr)
{
if(!rb_radixtree_add(chans, chptr->chname, chptr))
return 0;
rb_dlink_node *ptr;
RB_DLINK_FOREACH(ptr, chptr->members.head)
{
const struct membership *const msptr = ptr->data;
bloom_add_str(msptr->client_p->name);
}
return 1;
}
static
int expired(void)
{
return bloom_flushed + conf_bloom_refresh < rb_current_time();
}
static
void reset(void)
{
if(bloom[0])
bloom_flush();
if(chans)
rb_radixtree_destroy(chans, NULL, NULL);
chans = rb_radixtree_create("chans", irccasecanon);
}
static
void resize(const size_t size)
{
bloom_destroy();
reset();
bloom_create(size);
}
static
int prob_test_token(const char *const token)
{
return bloom_test_str(token);
}
static
int real_test_token(const char *const token,
struct Channel *const chptr)
{
struct Client *const client = find_named_client(token);
return client && IsMember(client, chptr);
}
static
void false_positive_message(void)
{
sendto_realops_snomask(SNO_GENERAL, L_ALL,
"spamfilter: Nickname bloom filter false positive (size: %zu members: %zu channels: %u flushed: %lu ago)",
bloom_size,
bloom_members,
rb_radixtree_size(chans),
rb_current_time() - bloom_flushed);
}
/*
* Always find the length of any multibyte character to advance past.
* The unicode space characters of concern are only of length 3.
*/
static
int is_delim(const uint8_t *const ptr,
unsigned int *const adv)
{
/* Some ascii ranges */
if((ptr[0] >= 0x20 && ptr[0] <= 0x2F) ||
(ptr[0] >= 0x3A && ptr[0] <= 0x40) ||
(ptr[0] >= 0x5C && ptr[0] <= 0x60) ||
(ptr[0] >= 0x7B && ptr[0] <= 0x7F))
return 1;
/* Unicode below here */
const int len = ((ptr[0] & 0x80) == 0x80)+
((ptr[0] & 0xC0) == 0xC0)+
((ptr[0] & 0xE0) == 0xE0)+
((ptr[0] & 0xF0) == 0xF0)+
((ptr[0] & 0xF8) == 0xF8)+
((ptr[0] & 0xFC) == 0xFC);
if(len)
*adv += len - 1;
if(len != 3)
return 0;
switch((htonl(*(const uint32_t *)ptr) & 0x1F7F7F00U) >> 8)
{
case 0x20000:
case 0x20001:
case 0x20002:
case 0x20003:
case 0x20004:
case 0x20005:
case 0x20006:
case 0x20007:
case 0x20008:
case 0x20009:
case 0x2000A:
case 0x2000B:
case 0x2002F:
case 0x2005F:
case 0x30000:
case 0xf3b3f:
return 1;
default:
return 0;
}
}
static unsigned int
count_nicks(const char *const text,
struct Channel *const chptr)
{
unsigned int ret = 0;
const size_t len = strlen(text);
for(unsigned int i = 0, j = 0, k = 0; i + 6 < len; i++)
{
if(!is_delim((const unsigned char *) text+i, &k))
{
j++;
continue;
}
if(j >= conf_nicklen_min && j <= NICKLEN)
{
char token[NICKLEN+1];
rb_strlcpy(token, text+i-j, j+1);
if(prob_test_token(token))
{
if(rb_likely(real_test_token(token, chptr)))
ret++;
else
false_positive_message();
}
}
i += k;
j = 0;
k = 0;
}
return ret;
}
static
void hook_spamfilter_query(hook_data_privmsg_channel *const hook)
{
if(hook->approved != 0)
return;
if(!bloom[0])
return;
const unsigned int counted = count_nicks(hook->text, hook->chptr);
if(counted < conf_limit)
return;
static char reason[64];
snprintf(reason, sizeof(reason), "nicks: counted at least %u names", counted);
hook->reason = reason;
hook->approved = -1;
}
static
void hook_channel_join(hook_data_channel_approval *const data)
{
if(~data->chptr->mode.mode & chmode_table[(uint8_t)MODE_SPAMFILTER].mode_type)
return;
if(!bloom[0])
return;
if(expired())
reset();
if(chans_has(data->chptr))
bloom_add_str(data->client->name);
else
chans_add(data->chptr);
}
static
int conf_spamfilter_nicks_end(struct TopConf *const tc)
{
if(conf_bloom_size != bloom_size)
resize(conf_bloom_size);
return 0;
}
static void set_conf_limit(void *const val) { conf_limit = *(int *)val; }
static void set_conf_nicklen_min(void *const val) { conf_nicklen_min = *(int *)val; }
static void set_conf_bloom_size(void *const val) { conf_bloom_size = *(int *)val; }
static void set_conf_bloom_bits(void *const val) { conf_bloom_bits = *(int *)val; }
static void set_conf_bloom_refresh(void *const val) { conf_bloom_refresh = *(time_t *)val; }
struct ConfEntry conf_spamfilter_nicks[] =
{
{ "limit", CF_INT, set_conf_limit, 0, NULL },
{ "nicklen_min", CF_INT, set_conf_nicklen_min, 0, NULL },
{ "bloom_size", CF_INT, set_conf_bloom_size, 0, NULL },
{ "bloom_bits", CF_INT, set_conf_bloom_bits, 0, NULL },
{ "bloom_refresh", CF_TIME, set_conf_bloom_refresh, 0, NULL },
{ "\0", 0, NULL, 0, NULL }
};
static
int modinit(void)
{
add_top_conf("spamfilter_nicks", NULL, conf_spamfilter_nicks_end, conf_spamfilter_nicks);
rb_get_random(&bloom_salt, sizeof(bloom_salt));
resize(conf_bloom_size);
return 0;
}
static
void modfini(void)
{
bloom_destroy();
rb_radixtree_destroy(chans, NULL, NULL);
remove_top_conf("spamfilter_nicks");
}
mapi_hfn_list_av1 hfnlist[] =
{
{ "spamfilter_query", (hookfn)hook_spamfilter_query },
{ "channel_join", (hookfn)hook_channel_join },
{ NULL, NULL }
};
DECLARE_MODULE_AV1
(
spamfilter_nicks,
modinit,
modfini,
NULL,
NULL,
hfnlist,
"$Revision: 0 $"
);