0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-05-29 00:03:45 +02:00

ircd:Ⓜ️ Transitional improvements to mxid grammars & tools.

This commit is contained in:
Jason Volk 2017-12-12 13:13:06 -07:00
parent 1ddf0f546a
commit e0f64a7ca2
2 changed files with 485 additions and 247 deletions

View file

@ -27,41 +27,55 @@
namespace ircd::m
{
struct id;
IRCD_M_EXCEPTION(error, INVALID_MXID, http::BAD_REQUEST)
IRCD_M_EXCEPTION(INVALID_MXID, BAD_SIGIL, http::BAD_REQUEST)
IRCD_OVERLOAD(generate)
struct id;
bool my(const id &);
}
//
// Interface to a string representing an mxid. The m::id itself is just a
// string_view over some existing data. m::id::buf is an m::id with an
// internal array providing the buffer.
//
/// (Appendix 4.2) Common Identifier Format
///
/// The Matrix protocol uses a common format to assign unique identifiers to
/// a number of entities, including users, events and rooms. Each identifier
/// takes the form: `&localpart:domain` where & represents a 'sigil' character;
/// domain is the server name of the homeserver which allocated the identifier,
/// and localpart is an identifier allocated by that homeserver. The precise
/// grammar defining the allowable format of an identifier depends on the type
/// of identifier.
///
/// This structure is an interface to a string representing an mxid. The m::id
/// itself is just a string_view over some existing data. m::id::buf is an
/// m::id with an internal array providing the buffer.
///
struct ircd::m::id
:string_view
{
struct event;
struct user;
struct room;
struct alias;
struct room_alias;
struct group;
struct origin;
template<class T, size_t SIZE = 256> struct buf;
struct parser;
struct generator;
enum sigil :char;
template<class T, size_t SIZE = 256> struct buf;
template<class it> struct input;
template<class it> struct output;
struct parser;
struct printer;
struct validator;
protected:
struct parser static const parser;
struct printer static const printer;
struct validator static const validator;
public:
// Extract elements
string_view local() const { return split(*this, ':').first; }
string_view host() const { return split(*this, ':').second; }
string_view name() const { return lstrip(local(), at(0)); }
string_view local() const;
string_view host() const;
string_view name() const;
string_view hostname() const;
uint16_t hostport() const;
@ -75,15 +89,109 @@ struct ircd::m::id
id() = default;
};
/// (4.2) The sigil characters
enum ircd::m::id::sigil
:char
{
EVENT = '$',
USER = '@',
ROOM = '!',
ALIAS = '#',
USER = '@', ///< User ID (4.2.1)
EVENT = '$', ///< Event ID (4.2.2)
ROOM = '!', ///< Room ID (4.2.2)
ROOM_ALIAS = '#', ///< Room alias (4.2.3)
GROUP = '+', ///< Group ID (experimental)
ORIGIN = ':', ///< Origin ID (experimental)
};
/// (Appendix 4.2.1) User Identifiers
///
/// Users within Matrix are uniquely identified by their Matrix user ID. The
/// user ID is namespaced to the homeserver which allocated the account and
/// has the form: `@localpart:domain` The localpart of a user ID is an opaque
/// identifier for that user. It MUST NOT be empty, and MUST contain only the
/// characters a-z, 0-9, ., _, =, -, and /. The domain of a user ID is the
/// server name of the homeserver which allocated the account. The length of
/// a user ID, including the @ sigil and the domain, MUST NOT exceed 255
/// characters.
///
struct ircd::m::id::user
:ircd::m::id
{
using buf = m::id::buf<user>;
template<class... args> user(args&&... a) :m::id{USER, std::forward<args>(a)...} {}
user() = default;
};
/// (Appendix 4.2.2) Room IDs and Event IDs
///
/// An event has exactly one event ID. An event ID has the format:
/// `$opaque_id:domain` The domain of an event ID is the server name of the
/// homeserver which created the event. The domain is used only for namespacing
/// to avoid the risk of clashes of identifiers between different homeservers.
/// There is no implication that the event in question is still available at
/// the corresponding homeserver. Event IDs are case-sensitive. They are not
/// meant to be human readable.
///
struct ircd::m::id::event
:ircd::m::id
{
using buf = m::id::buf<event>;
template<class... args> event(args&&... a) :m::id{EVENT, std::forward<args>(a)...} {}
event() = default;
};
/// (Appendix 4.2.2) Room IDs and Event IDs
///
/// A room has exactly one room ID. A room ID has the format:
/// `!opaque_id:domain` The domain of a room ID is the server name of the
/// homeserver which created the room. The domain is used only for namespacing
/// to avoid the risk of clashes of identifiers between different homeservers.
/// There is no implication that the room in question is still available at
/// the corresponding homeserver. Room IDs are case-sensitive. They are not
/// meant to be human readable.
///
struct ircd::m::id::room
:ircd::m::id
{
using buf = m::id::buf<room>;
template<class... args> room(args&&... a) :m::id{ROOM, std::forward<args>(a)...} {}
room() = default;
};
/// (Appendix 4.2.3) Room Aliases
/// A room may have zero or more aliases. A room alias has the format:
/// `#room_alias:domain` The domain of a room alias is the server name of the
/// homeserver which created the alias. Other servers may contact this
/// homeserver to look up the alias. Room aliases MUST NOT exceed 255 bytes
/// (including the # sigil and the domain).
///
struct ircd::m::id::room_alias
:ircd::m::id
{
using buf = m::id::buf<room_alias>;
template<class... args> room_alias(args&&... a) :m::id{ROOM_ALIAS, std::forward<args>(a)...} {}
room_alias() = default;
};
/// Group ID (EXPERIMENTAL)
///
struct ircd::m::id::group
:ircd::m::id
{
using buf = m::id::buf<group>;
template<class... args> group(args&&... a) :m::id{GROUP, std::forward<args>(a)...} {}
group() = default;
};
/// Group ID (EXPERIMENTAL)
///
struct ircd::m::id::origin
:ircd::m::id
{
using buf = m::id::buf<origin>;
template<class... args> origin(args&&... a) :m::id{ORIGIN, std::forward<args>(a)...} {}
origin() = default;
};
// Utilities
namespace ircd::m
{
id::sigil sigil(const char &c);
@ -98,43 +206,7 @@ namespace ircd::m
void validate(const id::sigil &, const string_view &); // valid() | throws
}
//
// convenience typedefs
//
struct ircd::m::id::event
:ircd::m::id
{
using buf = m::id::buf<event>;
template<class... args> event(args&&... a) :m::id{EVENT, std::forward<args>(a)...} {}
event() = default;
};
struct ircd::m::id::user
:ircd::m::id
{
using buf = m::id::buf<user>;
template<class... args> user(args&&... a) :m::id{USER, std::forward<args>(a)...} {}
user() = default;
};
struct ircd::m::id::room
:ircd::m::id
{
using buf = m::id::buf<room>;
template<class... args> room(args&&... a) :m::id{ROOM, std::forward<args>(a)...} {}
room() = default;
};
struct ircd::m::id::alias
:ircd::m::id
{
using buf = m::id::buf<alias>;
template<class... args> alias(args&&... a) :m::id{ALIAS, std::forward<args>(a)...} {}
alias() = default;
};
/// ID object backed by an internal buffer of wost-case size.
/// ID object backed by an internal buffer of default worst-case size.
///
template<class T,
size_t MAX = 256>
@ -193,26 +265,3 @@ struct ircd::m::id::buf
return *this;
}
};
inline uint16_t
ircd::m::id::hostport()
const try
{
const auto port
{
split(host(), ':').second
};
return port? lex_cast<uint16_t>(port) : 8448;
}
catch(const std::exception &e)
{
return 8448;
}
inline ircd::string_view
ircd::m::id::hostname()
const
{
return split(host(), ':').first;
}

View file

@ -22,27 +22,41 @@
#include <ircd/spirit.h>
#include <ircd/m/m.h>
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = qi::ascii;
using namespace ircd;
namespace ircd::m
{
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace karma = spirit::karma;
namespace ascii = qi::ascii;
using qi::lit;
using qi::string;
using qi::char_;
using qi::short_;
using qi::ushort_;
using qi::int_;
using qi::long_;
using qi::repeat;
using qi::omit;
using qi::raw;
using qi::attr;
using qi::eps;
using qi::attr_cast;
using qi::lit;
using qi::string;
using qi::char_;
using qi::short_;
using qi::ushort_;
using qi::int_;
using qi::long_;
using qi::repeat;
using qi::omit;
using qi::raw;
using qi::attr;
using qi::eps;
using qi::attr_cast;
using karma::lit;
using karma::char_;
using karma::long_;
using karma::double_;
using karma::bool_;
using karma::maxwidth;
using karma::eps;
using karma::attr_cast;
[[noreturn]] void failure(const qi::expectation_failure<const char *> &, const string_view &);
}
template<class it>
struct grammar
struct ircd::m::id::input
:qi::grammar<it, spirit::unused_type>
{
template<class R = spirit::unused_type, class... S> using rule = qi::rule<it, R, S...>;
@ -50,13 +64,80 @@ struct grammar
rule<> NUL { lit('\0') ,"nul" };
rule<> SP { lit('\x20') ,"space" };
rule<> HT { lit('\x09') ,"horizontal tab" };
rule<> ws { SP | HT ,"whitespace" };
rule<> CR { lit('\x0D') ,"carriage return" };
rule<> LF { lit('\x0A') ,"line feed" };
rule<> CRLF { CR >> LF ,"carriage return, line feed" };
rule<> ws { SP | HT ,"whitespace" };
rule<> illegal { NUL | CR | LF ,"illegal" };
// Sigils
const rule<> event_id_sigil { lit(char(ircd::m::id::EVENT)) ,"event_id sigil" };
const rule<> user_id_sigil { lit(char(ircd::m::id::USER)) ,"user_id sigil" };
const rule<> room_id_sigil { lit(char(ircd::m::id::ROOM)) ,"room_id sigil" };
const rule<> room_alias_sigil { lit(char(ircd::m::id::ROOM_ALIAS)) ,"room_alias sigil" };
const rule<> group_id_sigil { lit(char(ircd::m::id::GROUP)) ,"group_id sigil" };
const rule<> origin_sigil { lit(char(ircd::m::id::ORIGIN)) ,"origin sigil" };
const rule<enum sigil> sigil
{
event_id_sigil |
user_id_sigil |
room_id_sigil |
room_alias_sigil |
group_id_sigil |
origin_sigil
,"sigil"
};
// character of a localpart; must not contain ':' because that's the terminator
const rule<> localpart_char
{
char_ - ':'
,"localpart character"
};
// a localpart is zero or more localpart characters
const rule<> localpart
{
*(localpart_char)
,"localpart"
};
// character of a non-historical user_id localpart
const rule<> user_id_char
{
char_("a-z/_=.\x2D") // x2d is '-'
,"user_id character"
};
// a user_id localpart is 1 or more user_id localpart characters
const rule<> user_id_localpart
{
+user_id_char
,"user_id localpart"
};
// a prefix is a sigil and a localpart; user_id prefix
const rule<> user_id_prefix
{
user_id_sigil >> user_id_localpart
,"user_id prefix"
};
// a prefix is a sigil and a localpart; proper invert of user_id prefix
const rule<> non_user_id_prefix
{
((!user_id_sigil) > sigil) >> localpart
,"non user_id prefix"
};
// a prefix is a sigil and a localpart
const rule<> prefix
{
user_id_prefix | non_user_id_prefix
,"prefix"
};
//TODO: XXX-----------------
//TODO: XXX start an ircd::net grammar; move to net::
/*
rule<string_view> authority
{
@ -89,48 +170,55 @@ struct grammar
,"dns name"
};
//TODO: /XXX-----------------
/// (Appendix 4.1) Server Name
/// A homeserver is uniquely identified by its server name. This value
/// is used in a number of identifiers, as described below. The server
/// name represents the address at which the homeserver in question can
/// be reached by other homeservers. The complete grammar is:
/// `server_name = dns_name [ ":" port]`
/// `dns_name = host`
/// `port = *DIGIT`
/// where host is as defined by RFC3986, section 3.2.2. Examples of valid
/// server names are:
/// `matrix.org`
/// `matrix.org:8888`
/// `1.2.3.4` (IPv4 literal)
/// `1.2.3.4:1234` (IPv4 literal with explicit port)
/// `[1234:5678::abcd]` (IPv6 literal)
/// `[1234:5678::abcd]:5678` (IPv6 literal with explicit port)
const rule<> server_name
{
dns_name >> -(':' >> port)
,"server name"
};
const rule<> sigil
{
lit(char(m::id::EVENT)) |
lit(char(m::id::USER)) |
lit(char(m::id::ROOM)) |
lit(char(m::id::ALIAS))
,"sigil"
};
const rule<> localpart
{
sigil >> *(char_ - ':')
,"localpart"
};
const rule<> mxid
{
localpart >> ':' >> server_name
prefix >> ':' >> server_name
,"mxid"
};
grammar()
:grammar::base_type{rule<>{}}
input()
:input::base_type{rule<>{}}
{}
};
template<class it>
struct ircd::m::id::output
:karma::grammar<it, spirit::unused_type>
{
template<class T = spirit::unused_type> using rule = karma::rule<it, T>;
output()
:output::base_type{rule<>{}}
{}
};
struct ircd::m::id::parser
:grammar<const char *>
:input<const char *>
{
struct validator;
const rule<string_view> mxid_view
{
raw[eps > mxid]
};
string_view operator()(const id::sigil &, const string_view &id) const;
string_view operator()(const string_view &id) const;
}
@ -139,95 +227,179 @@ const ircd::m::id::parser;
ircd::string_view
ircd::m::id::parser::operator()(const id::sigil &sigil,
const string_view &id)
const
const try
{
const rule<string_view> view_mxid
{
raw[&lit(char(sigil)) > mxid]
,"mxid"
};
string_view out;
const char *start{id.data()};
const char *const stop{id.data() + id.size()};
qi::parse(start, stop, raw[mxid_view], out);
qi::parse(start, stop, eps > view_mxid, out);
return out;
}
catch(const qi::expectation_failure<const char *> &e)
{
failure(e, "mxid");
}
ircd::string_view
ircd::m::id::parser::operator()(const string_view &id)
const
const try
{
static const rule<string_view> view_mxid
{
raw[mxid]
};
string_view out;
const char *start{id.data()};
const char *const stop{id.data() + id.size()};
qi::parse(start, stop, raw[mxid_view], out);
qi::parse(start, stop, eps > view_mxid, out);
return out;
}
struct ircd::m::id::parser::validator
:grammar<const char *>
catch(const qi::expectation_failure<const char *> &e)
{
const rule<> valid
{
mxid
};
failure(e, "mxid");
}
struct ircd::m::id::validator
:input<const char *>
{
void operator()(const id::sigil &sigil, const string_view &id) const;
void operator()(const string_view &id) const;
}
const validator;
const ircd::m::id::validator;
void
ircd::m::id::parser::validator::operator()(const string_view &id)
ircd::m::id::validator::operator()(const string_view &id)
const try
{
const char *start{id.data()};
const char *const stop{id.data() + id.size()};
qi::parse(start, stop, eps > valid);
qi::parse(start, stop, eps > mxid);
}
catch(const qi::expectation_failure<const char *> &e)
{
const auto rule
{
ircd::string(e.what_)
};
throw INVALID_MXID
{
"Not a valid mxid because of an invalid %s.",
between(rule, '<', '>')
};
failure(e, "mxid");
}
void
ircd::m::id::parser::validator::operator()(const id::sigil &sigil,
const string_view &id)
ircd::m::id::validator::operator()(const id::sigil &sigil,
const string_view &id)
const try
{
const rule<string_view> valid_mxid
{
&lit(char(sigil)) > mxid
,"mxid"
};
const char *start{id.data()};
const char *const stop{id.data() + id.size()};
qi::parse(start, stop, eps > valid);
qi::parse(start, stop, eps > valid_mxid);
}
catch(const qi::expectation_failure<const char *> &e)
{
const auto rule
{
ircd::string(e.what_)
};
throw INVALID_MXID
{
"Not a valid '%s' mxid because of an invalid %s.",
reflect(sigil),
between(rule, '<', '>')
};
failure(e, reflect(sigil));
}
///////////////////////////////////////////////////////////////////////////////
//
// m/id.h
//
struct ircd::m::id::generator
//TODO: abstract this pattern with ircd::json::printer in ircd/spirit.h
struct ircd::m::id::printer
:output<const char *>
{
static string_view random_timebased(const id::sigil &, const mutable_buffer &);
static string_view random_prefixed(const id::sigil &, const string_view &prefix, const mutable_buffer &);
};
template<class generator,
class attribute>
bool operator()(char *&out, char *const &stop, generator&& g, attribute&& a) const
{
const auto throws{[&out, &stop]
{
throw INVALID_MXID
{
"Failed to print attribute '%s' generator '%s' (%zd bytes in buffer)",
demangle<decltype(a)>(),
demangle<decltype(g)>(),
size_t(stop - out)
};
}};
const auto gg
{
maxwidth(size_t(stop - out))[std::forward<generator>(g)] | eps[throws]
};
return karma::generate(out, gg, std::forward<attribute>(a));
}
template<class generator>
bool operator()(char *&out, char *const &stop, generator&& g) const
{
const auto throws{[&out, &stop]
{
throw INVALID_MXID
{
"Failed to print generator '%s' (%zd bytes in buffer)",
demangle<decltype(g)>(),
size_t(stop - out)
};
}};
const auto gg
{
maxwidth(size_t(stop - out))[std::forward<generator>(g)] | eps[throws]
};
return karma::generate(out, gg);
}
template<class... args>
bool operator()(mutable_buffer &out, args&&... a) const
{
return operator()(buffer::begin(out), buffer::end(out), std::forward<args>(a)...);
}
}
const ircd::m::id::printer;
ircd::string_view
ircd::m::id::printer::random_prefixed(const id::sigil &sigil,
const string_view &prefix,
const mutable_buffer &buf)
{
using buffer::data;
const auto len
{
fmt::sprintf(buf, "%c%s%u", char(sigil), prefix, rand::integer())
};
return { data(buf), size_t(len) };
}
ircd::string_view
ircd::m::id::printer::random_timebased(const id::sigil &sigil,
const mutable_buffer &buf)
{
using buffer::data;
using buffer::size;
const auto utime(microtime());
const auto len
{
snprintf(data(buf), size(buf), "%c%zd%06d", char(sigil), utime.first, utime.second)
};
return { data(buf), size_t(len) };
}
//
// id
//
ircd::m::id::id(const string_view &id)
:string_view{id}
@ -245,7 +417,6 @@ ircd::m::id::id(const id::sigil &sigil,
ircd::m::id::id(const id::sigil &sigil,
const mutable_buffer &buf,
const string_view &id)
try
:string_view
{[&sigil, &buf, &id]
{
@ -259,20 +430,6 @@ try
}()}
{
}
catch(const qi::expectation_failure<const char *> &e)
{
const auto rule
{
ircd::string(e.what_)
};
throw INVALID_MXID
{
"Not a valid '%s' mxid because of an invalid %s.",
reflect(sigil),
between(rule, '<', '>')
};
}
ircd::m::id::id(const enum sigil &sigil,
const mutable_buffer &buf,
@ -280,6 +437,8 @@ ircd::m::id::id(const enum sigil &sigil,
const string_view &host)
:string_view{[&]() -> string_view
{
//TODO: output grammar
using buffer::data;
using buffer::size;
@ -331,18 +490,20 @@ ircd::m::id::id(const enum sigil &sigil,
const string_view &host)
:string_view{[&]
{
//TODO: output grammar
char name[64]; switch(sigil)
{
case sigil::USER:
generator::random_prefixed(sigil::USER, "guest", name);
printer::random_prefixed(sigil::USER, "guest", name);
break;
case sigil::ALIAS:
generator::random_prefixed(sigil::ALIAS, "", name);
case sigil::ROOM_ALIAS:
printer::random_prefixed(sigil::ROOM_ALIAS, "", name);
break;
default:
generator::random_timebased(sigil, name);
printer::random_timebased(sigil, name);
break;
};
@ -356,35 +517,59 @@ ircd::m::id::id(const enum sigil &sigil,
{
}
ircd::string_view
ircd::m::id::generator::random_prefixed(const id::sigil &sigil,
const string_view &prefix,
const mutable_buffer &buf)
uint16_t
ircd::m::id::hostport()
const try
{
using buffer::data;
const auto len
//TODO: grammar
const auto port
{
fmt::sprintf(buf, "%c%s%u", char(sigil), prefix, rand::integer())
split(host(), ':').second
};
return { data(buf), size_t(len) };
return port? lex_cast<uint16_t>(port) : 8448;
}
catch(const std::exception &e)
{
return 8448;
}
ircd::string_view
ircd::m::id::generator::random_timebased(const id::sigil &sigil,
const mutable_buffer &buf)
ircd::m::id::hostname()
const
{
using buffer::data;
using buffer::size;
//TODO: grammar
return rsplit(host(), ':').first;
}
const auto utime(microtime());
const auto len
{
snprintf(data(buf), size(buf), "%c%zd%06d", char(sigil), utime.first, utime.second)
};
ircd::string_view
ircd::m::id::name()
const
{
//TODO: grammar
return lstrip(local(), at(0));
}
return { data(buf), size_t(len) };
ircd::string_view
ircd::m::id::host()
const
{
//TODO: grammar
return split(*this, ':').second;
}
ircd::string_view
ircd::m::id::local()
const
{
//TODO: grammar
return split(*this, ':').first;
}
bool
ircd::m::my(const id &id)
{
return my_host(id.host());
}
void
@ -392,7 +577,7 @@ ircd::m::validate(const id::sigil &sigil,
const string_view &id)
try
{
validator(sigil, id);
id::validator(sigil, id);
}
catch(const std::exception &e)
{
@ -409,7 +594,7 @@ ircd::m::valid(const id::sigil &sigil,
const string_view &id)
noexcept try
{
validator(sigil, id);
id::validator(sigil, id);
return true;
}
catch(...)
@ -421,18 +606,14 @@ bool
ircd::m::valid_local(const id::sigil &sigil,
const string_view &id)
{
const auto parts(split(id, ':'));
const auto &local(parts.first);
static const auto test
{
&lit(char(sigil)) > m::id::parser.prefix
};
// local requires a sigil plus at least one character
if(local.size() < 2)
return false;
// local requires the correct sigil type
if(!startswith(local, sigil))
return false;
return true;
const char *start{id.data()};
const char *const stop{id.data() + id.size()};
return qi::parse(start, stop, test);
}
bool
@ -449,16 +630,9 @@ catch(const std::out_of_range &e)
bool
ircd::m::valid_sigil(const char &c)
{
switch(c)
{
case id::EVENT:
case id::USER:
case id::ALIAS:
case id::ROOM:
return true;
}
return false;
const char *start{&c};
const char *const stop{start + 1};
return qi::parse(start, stop, id::parser.sigil);
}
enum ircd::m::id::sigil
@ -469,21 +643,19 @@ try
}
catch(const std::out_of_range &e)
{
throw BAD_SIGIL("sigil undefined");
throw BAD_SIGIL("no sigil provided");
}
enum ircd::m::id::sigil
ircd::m::sigil(const char &c)
{
switch(c)
{
case '$': return id::EVENT;
case '@': return id::USER;
case '#': return id::ALIAS;
case '!': return id::ROOM;
}
id::sigil ret;
const char *start{&c};
const char *const stop{start + 1};
if(!qi::parse(start, stop, id::parser.sigil, ret))
throw BAD_SIGIL("'%c' is not a valid sigil", c);
throw BAD_SIGIL("'%c' is not a valid sigil", c);
return ret;
}
const char *
@ -491,11 +663,28 @@ ircd::m::reflect(const enum id::sigil &c)
{
switch(c)
{
case id::EVENT: return "EVENT";
case id::USER: return "USER";
case id::ALIAS: return "ALIAS";
case id::ROOM: return "ROOM";
case id::EVENT: return "EVENT";
case id::USER: return "USER";
case id::ROOM: return "ROOM";
case id::ROOM_ALIAS: return "ROOM_ALIAS";
case id::GROUP: return "GROUP";
case id::ORIGIN: return "ORIGIN";
}
return "?????";
}
void
ircd::m::failure(const qi::expectation_failure<const char *> &e,
const string_view &goal)
{
auto rule
{
ircd::string(e.what_)
};
throw INVALID_MXID
{
"Not a valid %s because of an invalid %s.", goal, between(rule, '<', '>')
};
}