mirror of
https://github.com/matrix-construct/construct
synced 2024-12-28 00:14:07 +01:00
spamfilter: import @jevolk's spamfilter_nicks module
This commit is contained in:
parent
6727daa33e
commit
1a2426007f
2 changed files with 433 additions and 1 deletions
|
@ -1,7 +1,7 @@
|
|||
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/librb/include $(LTDLINCL)
|
||||
AM_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -shared
|
||||
AM_LDFLAGS += -export-symbols-regex _mheader
|
||||
LIBS += $(top_srcdir)/ircd/libircd.la
|
||||
LIBS += $(top_srcdir)/librb/src/librb.la $(top_srcdir)/ircd/libircd.la
|
||||
|
||||
extensiondir=@moduledir@/extensions
|
||||
|
||||
|
@ -64,6 +64,7 @@ extension_LTLIBRARIES = \
|
|||
no_kill_services.la \
|
||||
no_locops.la \
|
||||
no_oper_invis.la \
|
||||
spamfilter_nicks.la \
|
||||
spy_admin_notice.la \
|
||||
spy_info_notice.la \
|
||||
spy_links_notice.la \
|
||||
|
|
431
extensions/spamfilter_nicks.c
Normal file
431
extensions/spamfilter_nicks.c
Normal file
|
@ -0,0 +1,431 @@
|
|||
/************************************************************************
|
||||
* charybdis: an advanced ircd. extensions/spamfilter_nicks.c
|
||||
* Copyright (C) 2016 Jason Volk
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "stdinc.h"
|
||||
#include "modules.h"
|
||||
#include "hook.h"
|
||||
#include "client.h"
|
||||
#include "send.h"
|
||||
#include "hash.h"
|
||||
#include "newconf.h"
|
||||
#include "spamfilter.h"
|
||||
|
||||
|
||||
/* Conf items & defaults */
|
||||
size_t conf_limit = 5;
|
||||
size_t conf_nicklen_min = 4;
|
||||
size_t conf_bloom_size = 1024 * 64;
|
||||
size_t conf_bloom_bits = 16;
|
||||
time_t conf_bloom_refresh = 86400;
|
||||
|
||||
|
||||
/* Bloom filter hashes */
|
||||
static
|
||||
uint64_t bloom_hash_fnv(const char *const str)
|
||||
{
|
||||
return fnv_hash_upper((const unsigned char *)str, conf_bloom_bits);
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t bloom_hash_bernstein(const char *const str)
|
||||
{
|
||||
uint64_t ret = 7681;
|
||||
for(size_t i = 0; str[i]; i++)
|
||||
ret = ret * 33ULL + str[i];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define NUM_HASHES 2
|
||||
uint64_t (*bloom_hashes[NUM_HASHES])(const char *const str) =
|
||||
{
|
||||
bloom_hash_fnv,
|
||||
bloom_hash_bernstein,
|
||||
};
|
||||
|
||||
|
||||
/* Bloom filter state */
|
||||
uint8_t *bloom[NUM_HASHES];
|
||||
uint64_t bloom_salt;
|
||||
size_t bloom_size;
|
||||
size_t bloom_members;
|
||||
time_t bloom_flushed;
|
||||
struct rb_radixtree *chans; // Channels with MODE_SPAMFILTER that participate in the bloom filter
|
||||
|
||||
|
||||
static
|
||||
void bloom_flush(void)
|
||||
{
|
||||
for(size_t i = 0; i < NUM_HASHES; i++)
|
||||
memset(bloom[i], 0x0, bloom_size);
|
||||
|
||||
bloom_flushed = rb_current_time();
|
||||
bloom_members = 0;
|
||||
}
|
||||
|
||||
static
|
||||
void bloom_destroy(void)
|
||||
{
|
||||
for(size_t i = 0; i < NUM_HASHES; i++)
|
||||
{
|
||||
rb_free(bloom[i]);
|
||||
bloom[i] = NULL;
|
||||
}
|
||||
|
||||
bloom_members = 0;
|
||||
bloom_size = 0;
|
||||
}
|
||||
|
||||
static
|
||||
void bloom_create(const size_t size)
|
||||
{
|
||||
if(!size)
|
||||
return;
|
||||
|
||||
for(size_t i = 0; i < NUM_HASHES; i++)
|
||||
bloom[i] = rb_malloc(size);
|
||||
|
||||
bloom_size = size;
|
||||
bloom_flush();
|
||||
}
|
||||
|
||||
static
|
||||
void bloom_add(const size_t filter,
|
||||
uint64_t hash)
|
||||
{
|
||||
hash += bloom_salt;
|
||||
hash %= bloom_size * 8UL;
|
||||
bloom[filter][hash / 8UL] |= (1U << (hash % 8UL));
|
||||
}
|
||||
|
||||
static
|
||||
int bloom_test(const size_t filter,
|
||||
uint64_t hash)
|
||||
{
|
||||
hash += bloom_salt;
|
||||
hash %= bloom_size * 8UL;
|
||||
const int bit = hash % 8UL;
|
||||
return (bloom[filter][hash / 8UL] & (1U << bit)) >> bit;
|
||||
}
|
||||
|
||||
static
|
||||
void bloom_add_str(const char *const str)
|
||||
{
|
||||
for(size_t i = 0; i < NUM_HASHES; i++)
|
||||
bloom_add(i, bloom_hashes[i](str));
|
||||
|
||||
bloom_members++;
|
||||
}
|
||||
|
||||
static
|
||||
int bloom_test_str(const char *const str)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
for(size_t i = 0; i < NUM_HASHES; i++)
|
||||
count += bloom_test(i, bloom_hashes[i](str));
|
||||
|
||||
return count >= NUM_HASHES;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int chans_has(const struct Channel *const chptr)
|
||||
{
|
||||
return rb_radixtree_retrieve(chans, chptr->chname) != NULL;
|
||||
}
|
||||
|
||||
static
|
||||
int chans_add(struct Channel *const chptr)
|
||||
{
|
||||
if(!rb_radixtree_add(chans, chptr->chname, chptr))
|
||||
return 0;
|
||||
|
||||
rb_dlink_node *ptr;
|
||||
RB_DLINK_FOREACH(ptr, chptr->members.head)
|
||||
{
|
||||
const struct membership *const msptr = ptr->data;
|
||||
bloom_add_str(msptr->client_p->name);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int expired(void)
|
||||
{
|
||||
return bloom_flushed + conf_bloom_refresh < rb_current_time();
|
||||
}
|
||||
|
||||
static
|
||||
void reset(void)
|
||||
{
|
||||
if(bloom[0])
|
||||
bloom_flush();
|
||||
|
||||
if(chans)
|
||||
rb_radixtree_destroy(chans, NULL, NULL);
|
||||
|
||||
chans = rb_radixtree_create("chans", irccasecanon);
|
||||
}
|
||||
|
||||
static
|
||||
void resize(const size_t size)
|
||||
{
|
||||
bloom_destroy();
|
||||
reset();
|
||||
bloom_create(size);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int prob_test_token(const char *const token)
|
||||
{
|
||||
return bloom_test_str(token);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int real_test_token(const char *const token,
|
||||
struct Channel *const chptr)
|
||||
{
|
||||
struct Client *const client = find_named_client(token);
|
||||
return client && IsMember(client, chptr);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void false_positive_message(void)
|
||||
{
|
||||
sendto_realops_snomask(SNO_GENERAL, L_ALL,
|
||||
"spamfilter: Nickname bloom filter false positive (size: %zu members: %zu channels: %u flushed: %lu ago)",
|
||||
bloom_size,
|
||||
bloom_members,
|
||||
rb_radixtree_size(chans),
|
||||
rb_current_time() - bloom_flushed);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Always find the length of any multibyte character to advance past.
|
||||
* The unicode space characters of concern are only of length 3.
|
||||
*/
|
||||
static
|
||||
int is_delim(const uint8_t *const ptr,
|
||||
unsigned int *const adv)
|
||||
{
|
||||
/* Some ascii ranges */
|
||||
if((ptr[0] >= 0x20 && ptr[0] <= 0x2F) ||
|
||||
(ptr[0] >= 0x3A && ptr[0] <= 0x40) ||
|
||||
(ptr[0] >= 0x5C && ptr[0] <= 0x60) ||
|
||||
(ptr[0] >= 0x7B && ptr[0] <= 0x7F))
|
||||
return 1;
|
||||
|
||||
/* Unicode below here */
|
||||
const int len = ((ptr[0] & 0x80) == 0x80)+
|
||||
((ptr[0] & 0xC0) == 0xC0)+
|
||||
((ptr[0] & 0xE0) == 0xE0)+
|
||||
((ptr[0] & 0xF0) == 0xF0)+
|
||||
((ptr[0] & 0xF8) == 0xF8)+
|
||||
((ptr[0] & 0xFC) == 0xFC);
|
||||
|
||||
if(len)
|
||||
*adv += len - 1;
|
||||
|
||||
if(len != 3)
|
||||
return 0;
|
||||
|
||||
switch((htonl(*(const uint32_t *)ptr) & 0x1F7F7F00U) >> 8)
|
||||
{
|
||||
case 0x20000:
|
||||
case 0x20001:
|
||||
case 0x20002:
|
||||
case 0x20003:
|
||||
case 0x20004:
|
||||
case 0x20005:
|
||||
case 0x20006:
|
||||
case 0x20007:
|
||||
case 0x20008:
|
||||
case 0x20009:
|
||||
case 0x2000A:
|
||||
case 0x2000B:
|
||||
case 0x2002F:
|
||||
case 0x2005F:
|
||||
case 0x30000:
|
||||
case 0xf3b3f:
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static unsigned int
|
||||
count_nicks(const char *const text,
|
||||
struct Channel *const chptr)
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
const size_t len = strlen(text);
|
||||
|
||||
for(unsigned int i = 0, j = 0, k = 0; i + 6 < len; i++)
|
||||
{
|
||||
if(!is_delim((const unsigned char *) text+i, &k))
|
||||
{
|
||||
j++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(j >= conf_nicklen_min && j <= NICKLEN)
|
||||
{
|
||||
char token[NICKLEN+1];
|
||||
rb_strlcpy(token, text+i-j, j+1);
|
||||
if(prob_test_token(token))
|
||||
{
|
||||
if(rb_likely(real_test_token(token, chptr)))
|
||||
ret++;
|
||||
else
|
||||
false_positive_message();
|
||||
}
|
||||
}
|
||||
|
||||
i += k;
|
||||
j = 0;
|
||||
k = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void hook_spamfilter_query(hook_data_privmsg_channel *const hook)
|
||||
{
|
||||
if(hook->approved != 0)
|
||||
return;
|
||||
|
||||
if(!bloom[0])
|
||||
return;
|
||||
|
||||
const unsigned int counted = count_nicks(hook->text, hook->chptr);
|
||||
if(counted < conf_limit)
|
||||
return;
|
||||
|
||||
static char reason[64];
|
||||
snprintf(reason, sizeof(reason), "nicks: counted at least %u names", counted);
|
||||
hook->reason = reason;
|
||||
hook->approved = -1;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void hook_channel_join(hook_data_channel_approval *const data)
|
||||
{
|
||||
if(~data->chptr->mode.mode & chmode_table[(uint8_t)MODE_SPAMFILTER].mode_type)
|
||||
return;
|
||||
|
||||
if(!bloom[0])
|
||||
return;
|
||||
|
||||
if(expired())
|
||||
reset();
|
||||
|
||||
if(chans_has(data->chptr))
|
||||
bloom_add_str(data->client->name);
|
||||
else
|
||||
chans_add(data->chptr);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int conf_spamfilter_nicks_end(struct TopConf *const tc)
|
||||
{
|
||||
if(conf_bloom_size != bloom_size)
|
||||
resize(conf_bloom_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void set_conf_limit(void *const val) { conf_limit = *(int *)val; }
|
||||
static void set_conf_nicklen_min(void *const val) { conf_nicklen_min = *(int *)val; }
|
||||
static void set_conf_bloom_size(void *const val) { conf_bloom_size = *(int *)val; }
|
||||
static void set_conf_bloom_bits(void *const val) { conf_bloom_bits = *(int *)val; }
|
||||
static void set_conf_bloom_refresh(void *const val) { conf_bloom_refresh = *(time_t *)val; }
|
||||
|
||||
|
||||
struct ConfEntry conf_spamfilter_nicks[] =
|
||||
{
|
||||
{ "limit", CF_INT, set_conf_limit, 0, NULL },
|
||||
{ "nicklen_min", CF_INT, set_conf_nicklen_min, 0, NULL },
|
||||
{ "bloom_size", CF_INT, set_conf_bloom_size, 0, NULL },
|
||||
{ "bloom_bits", CF_INT, set_conf_bloom_bits, 0, NULL },
|
||||
{ "bloom_refresh", CF_TIME, set_conf_bloom_refresh, 0, NULL },
|
||||
{ "\0", 0, NULL, 0, NULL }
|
||||
};
|
||||
|
||||
|
||||
static
|
||||
int modinit(void)
|
||||
{
|
||||
add_top_conf("spamfilter_nicks", NULL, conf_spamfilter_nicks_end, conf_spamfilter_nicks);
|
||||
rb_get_random(&bloom_salt, sizeof(bloom_salt));
|
||||
resize(conf_bloom_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void modfini(void)
|
||||
{
|
||||
bloom_destroy();
|
||||
rb_radixtree_destroy(chans, NULL, NULL);
|
||||
remove_top_conf("spamfilter_nicks");
|
||||
}
|
||||
|
||||
|
||||
mapi_hfn_list_av1 hfnlist[] =
|
||||
{
|
||||
{ "spamfilter_query", (hookfn)hook_spamfilter_query },
|
||||
{ "channel_join", (hookfn)hook_channel_join },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
DECLARE_MODULE_AV1
|
||||
(
|
||||
spamfilter_nicks,
|
||||
modinit,
|
||||
modfini,
|
||||
NULL,
|
||||
NULL,
|
||||
hfnlist,
|
||||
"$Revision: 0 $"
|
||||
);
|
Loading…
Reference in a new issue