2018-02-04 03:22:01 +01:00
|
|
|
// Matrix Construct
|
|
|
|
//
|
|
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
|
|
// Copyright (C) 2016-2018 Jason Volk <jason@zemos.net>
|
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
|
|
// full license for this software is available in the LICENSE file.
|
2017-12-12 21:14:47 +01:00
|
|
|
|
2020-08-13 06:37:17 +02:00
|
|
|
namespace ircd::rfc3986::parser
|
2020-02-02 01:20:35 +01:00
|
|
|
{
|
|
|
|
using namespace ircd::spirit;
|
2020-08-13 06:37:17 +02:00
|
|
|
}
|
2020-02-02 01:20:35 +01:00
|
|
|
|
2019-06-14 04:06:05 +02:00
|
|
|
decltype(ircd::rfc3986::parser::sub_delims)
|
|
|
|
ircd::rfc3986::parser::sub_delims
|
|
|
|
{
|
|
|
|
lit('!') | lit('$') | lit('&') | lit('\'') |
|
|
|
|
lit('(') | lit(')') | lit('*') | lit('+') |
|
|
|
|
lit(',') | lit(';') | lit('=')
|
|
|
|
,"sub-delims"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::gen_delims)
|
|
|
|
ircd::rfc3986::parser::gen_delims
|
|
|
|
{
|
|
|
|
lit(':') | lit('/') | lit('?') | lit('#') |
|
|
|
|
lit('[') | lit(']') | lit('@')
|
|
|
|
,"gen-delims"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::unreserved)
|
|
|
|
ircd::rfc3986::parser::unreserved
|
|
|
|
{
|
|
|
|
ascii::alpha | ascii::digit |
|
|
|
|
lit('-') | lit('.') | lit('_') | lit('~')
|
2020-03-29 03:57:26 +02:00
|
|
|
,"unreserved"
|
2019-06-14 04:06:05 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::reserved)
|
|
|
|
ircd::rfc3986::parser::reserved
|
|
|
|
{
|
|
|
|
gen_delims | sub_delims
|
|
|
|
,"reserved"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::pct_encoded)
|
|
|
|
ircd::rfc3986::parser::pct_encoded
|
|
|
|
{
|
|
|
|
lit('%') >> repeat(2)[ascii::xdigit]
|
|
|
|
,"pct-encoded"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::pchar)
|
|
|
|
ircd::rfc3986::parser::pchar
|
|
|
|
{
|
|
|
|
unreserved | pct_encoded | sub_delims | lit(':') | lit('@')
|
|
|
|
,"pchar"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::query)
|
|
|
|
ircd::rfc3986::parser::query
|
|
|
|
{
|
|
|
|
*(pchar | lit('/') | lit('?'))
|
|
|
|
,"query"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::fragment)
|
|
|
|
ircd::rfc3986::parser::fragment
|
|
|
|
{
|
|
|
|
*(pchar | lit('/') | lit('?'))
|
|
|
|
,"fragment"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::segment_nz_nc)
|
|
|
|
ircd::rfc3986::parser::segment_nz_nc
|
|
|
|
{
|
|
|
|
+(unreserved | pct_encoded | sub_delims | lit('@'))
|
|
|
|
,"segment-nz-nc"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::segment_nz)
|
|
|
|
ircd::rfc3986::parser::segment_nz
|
|
|
|
{
|
|
|
|
+pchar
|
|
|
|
,"segment-nz"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::segment)
|
|
|
|
ircd::rfc3986::parser::segment
|
|
|
|
{
|
|
|
|
*pchar
|
|
|
|
,"segment"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::path_abempty)
|
|
|
|
ircd::rfc3986::parser::path_abempty
|
|
|
|
{
|
|
|
|
*(lit('/') >> segment)
|
|
|
|
,"path-abempty"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::path_noscheme)
|
|
|
|
ircd::rfc3986::parser::path_noscheme
|
|
|
|
{
|
|
|
|
segment_nz_nc >> *(lit('/') >> segment)
|
|
|
|
,"path-noscheme"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::path_rootless)
|
|
|
|
ircd::rfc3986::parser::path_rootless
|
|
|
|
{
|
|
|
|
segment_nz >> *(lit('/') >> segment)
|
|
|
|
,"path-rootless"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::path_absolute)
|
|
|
|
ircd::rfc3986::parser::path_absolute
|
|
|
|
{
|
|
|
|
lit('/') >> -(path_rootless)
|
|
|
|
,"path-absolute"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::path)
|
|
|
|
ircd::rfc3986::parser::path
|
|
|
|
{
|
|
|
|
-(path_abempty | path_absolute | path_noscheme | path_rootless)
|
|
|
|
,"path"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::reg_name)
|
|
|
|
ircd::rfc3986::parser::reg_name
|
|
|
|
{
|
|
|
|
*(unreserved | pct_encoded | sub_delims)
|
|
|
|
,"reg-name"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::userinfo)
|
|
|
|
ircd::rfc3986::parser::userinfo
|
|
|
|
{
|
|
|
|
*(unreserved | pct_encoded | sub_delims | lit(':'))
|
|
|
|
,"userinfo"
|
|
|
|
};
|
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::port)
|
|
|
|
ircd::rfc3986::parser::port
|
2017-12-12 21:14:47 +01:00
|
|
|
{
|
2019-02-26 21:58:51 +01:00
|
|
|
ushort_
|
|
|
|
,"port number"
|
|
|
|
};
|
2017-12-12 21:14:47 +01:00
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::ip4_octet)
|
|
|
|
ircd::rfc3986::parser::ip4_octet
|
2017-12-12 21:14:47 +01:00
|
|
|
{
|
2019-02-26 21:58:51 +01:00
|
|
|
repeat(1,3)[char_("0-9")]
|
|
|
|
,"IPv4 octet"
|
|
|
|
};
|
2017-12-12 21:14:47 +01:00
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
decltype(ircd::rfc3986::parser::ip4_address)
|
|
|
|
ircd::rfc3986::parser::ip4_address
|
|
|
|
{
|
|
|
|
repeat(3)[ip4_octet >> '.'] >> ip4_octet
|
|
|
|
,"IPv4 address"
|
|
|
|
};
|
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::ip4_literal)
|
|
|
|
ircd::rfc3986::parser::ip4_literal
|
|
|
|
{
|
2019-03-25 20:07:06 +01:00
|
|
|
ip4_address
|
2019-02-26 21:58:51 +01:00
|
|
|
,"IPv4 literal"
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
decltype(ircd::rfc3986::parser::ip4_remote)
|
|
|
|
ircd::rfc3986::parser::ip4_remote
|
|
|
|
{
|
2022-06-29 07:36:46 +02:00
|
|
|
ip4_literal >> -(':' >> port)
|
2019-03-25 20:07:06 +01:00
|
|
|
,"IPv4 remote"
|
|
|
|
};
|
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::ip6_char)
|
|
|
|
ircd::rfc3986::parser::ip6_char
|
|
|
|
{
|
|
|
|
char_("0-9a-fA-F")
|
|
|
|
,"IPv6 character"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_h16)
|
|
|
|
ircd::rfc3986::parser::ip6_h16
|
|
|
|
{
|
|
|
|
repeat(1,4)[ip6_char]
|
|
|
|
,"IPv6 hexdigit"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_piece)
|
|
|
|
ircd::rfc3986::parser::ip6_piece
|
|
|
|
{
|
|
|
|
ip6_h16 >> ':'
|
|
|
|
,"IPv6 address piece"
|
|
|
|
};
|
|
|
|
|
|
|
|
// This is reversed from the BNF in the RFC otherwise it requires
|
|
|
|
// backtracking during the repeat[]; parsers are adjusted accordingly.
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_ipiece)
|
|
|
|
ircd::rfc3986::parser::ip6_ipiece
|
|
|
|
{
|
|
|
|
':' >> ip6_h16
|
|
|
|
,"IPv6 address piece"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_ls32)
|
|
|
|
ircd::rfc3986::parser::ip6_ls32
|
|
|
|
{
|
2019-03-25 20:07:06 +01:00
|
|
|
(ip6_h16 >> ':' >> ip6_h16) | ip4_address
|
2019-02-26 21:58:51 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/// https://tools.ietf.org/html/rfc3986 Appendix A
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_addr)
|
|
|
|
ircd::rfc3986::parser::ip6_addr
|
|
|
|
{
|
|
|
|
{ repeat(6)[ip6_piece] >> ip6_ls32 },
|
|
|
|
{ lit("::") >> repeat(5)[ip6_piece] >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> lit("::") >> repeat(4)[ip6_piece] >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> repeat(0,1)[ip6_ipiece] >> lit("::") >> repeat(3)[ip6_piece] >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> repeat(0,2)[ip6_ipiece] >> lit("::") >> repeat(2)[ip6_piece] >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> repeat(0,3)[ip6_ipiece] >> lit("::") >> ip6_piece >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> repeat(0,4)[ip6_ipiece] >> lit("::") >> ip6_ls32 },
|
|
|
|
{ ip6_h16 >> repeat(0,5)[ip6_ipiece] >> lit("::") >> -ip6_h16 },
|
|
|
|
{ lit("::") >> -ip6_h16 },
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_address)
|
|
|
|
ircd::rfc3986::parser::ip6_address
|
|
|
|
{
|
|
|
|
ip6_addr[0] | ip6_addr[1] | ip6_addr[2] |
|
|
|
|
ip6_addr[3] | ip6_addr[4] | ip6_addr[5] |
|
|
|
|
ip6_addr[6] | ip6_addr[7] | ip6_addr[8]
|
|
|
|
,"IPv6 address"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_literal)
|
|
|
|
ircd::rfc3986::parser::ip6_literal
|
|
|
|
{
|
|
|
|
'[' >> ip6_address >> ']'
|
2019-03-25 20:07:06 +01:00
|
|
|
,"IPv6 literal"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip6_remote)
|
|
|
|
ircd::rfc3986::parser::ip6_remote
|
|
|
|
{
|
2022-06-29 07:36:46 +02:00
|
|
|
ip6_literal >> -(':' >> port)
|
2019-03-25 20:07:06 +01:00
|
|
|
,"IPv6 literal"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip_address)
|
|
|
|
ircd::rfc3986::parser::ip_address
|
|
|
|
{
|
2019-04-17 00:27:09 +02:00
|
|
|
ip4_address | ip6_address
|
2019-03-25 20:07:06 +01:00
|
|
|
,"IP address"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip_literal)
|
|
|
|
ircd::rfc3986::parser::ip_literal
|
|
|
|
{
|
|
|
|
ip6_literal | ip4_literal
|
|
|
|
,"IP literal"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::ip_remote)
|
|
|
|
ircd::rfc3986::parser::ip_remote
|
|
|
|
{
|
2022-06-29 07:36:46 +02:00
|
|
|
ip_literal >> -(':' >> port)
|
2019-03-25 20:07:06 +01:00
|
|
|
,"IP literal"
|
2019-02-26 21:58:51 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::hostname)
|
|
|
|
ircd::rfc3986::parser::hostname
|
|
|
|
{
|
|
|
|
char_("A-Za-z0-9") >> *(char_("A-Za-z0-9\x2D")) // x2D is '-'
|
|
|
|
,"hostname"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::domain)
|
|
|
|
ircd::rfc3986::parser::domain
|
|
|
|
{
|
|
|
|
hostname % '.'
|
|
|
|
,"domain"
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
decltype(ircd::rfc3986::parser::hostport)
|
|
|
|
ircd::rfc3986::parser::hostport
|
|
|
|
{
|
2022-06-29 07:36:46 +02:00
|
|
|
domain >> -(':' >> port)
|
2019-03-25 20:07:06 +01:00
|
|
|
,"hostport"
|
|
|
|
};
|
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::host)
|
|
|
|
ircd::rfc3986::parser::host
|
|
|
|
{
|
2019-04-17 00:27:09 +02:00
|
|
|
ip_address | domain
|
2019-02-26 21:58:51 +01:00
|
|
|
,"host"
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
decltype(ircd::rfc3986::parser::host_literal)
|
|
|
|
ircd::rfc3986::parser::host_literal
|
|
|
|
{
|
|
|
|
ip_literal | domain
|
|
|
|
,"host literal"
|
|
|
|
};
|
|
|
|
|
2019-02-26 21:58:51 +01:00
|
|
|
decltype(ircd::rfc3986::parser::remote)
|
|
|
|
ircd::rfc3986::parser::remote
|
|
|
|
{
|
2019-03-25 20:07:06 +01:00
|
|
|
ip_remote | hostport
|
2019-02-26 21:58:51 +01:00
|
|
|
,"remote"
|
|
|
|
};
|
2017-12-12 21:14:47 +01:00
|
|
|
|
2019-06-14 04:06:05 +02:00
|
|
|
decltype(ircd::rfc3986::parser::authority)
|
|
|
|
ircd::rfc3986::parser::authority
|
|
|
|
{
|
|
|
|
-(userinfo >> lit('@')) >> remote
|
|
|
|
,"authority"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::scheme)
|
|
|
|
ircd::rfc3986::parser::scheme
|
|
|
|
{
|
|
|
|
ascii::alpha >> *(ascii::alnum | lit('+') | lit('-') | lit('.'))
|
|
|
|
,"scheme"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::hier_part)
|
|
|
|
ircd::rfc3986::parser::hier_part
|
|
|
|
{
|
|
|
|
-((lit("//") >> authority >> path_abempty) | path_absolute | path_rootless)
|
|
|
|
,"hier_part"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::relative_part)
|
|
|
|
ircd::rfc3986::parser::relative_part
|
|
|
|
{
|
|
|
|
-((lit("//") >> authority >> path_abempty) | path_absolute | path_noscheme)
|
|
|
|
,"relative-part"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::relative_ref)
|
|
|
|
ircd::rfc3986::parser::relative_ref
|
|
|
|
{
|
|
|
|
relative_part >> -(lit('?') >> query) >> -(lit('#') >> fragment)
|
|
|
|
,"relative-ref"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::absolute_uri)
|
|
|
|
ircd::rfc3986::parser::absolute_uri
|
|
|
|
{
|
|
|
|
scheme >> lit(':') >> hier_part >> -(lit('?') >> query)
|
|
|
|
,"absolute-URI"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::uri)
|
|
|
|
ircd::rfc3986::parser::uri
|
|
|
|
{
|
|
|
|
scheme >> lit(':') >> hier_part >> -(lit('?') >> query) >> -(lit('#') >> fragment)
|
|
|
|
,"URI"
|
|
|
|
};
|
|
|
|
|
|
|
|
decltype(ircd::rfc3986::parser::uri_ref)
|
|
|
|
ircd::rfc3986::parser::uri_ref
|
|
|
|
{
|
|
|
|
uri | relative_ref
|
|
|
|
,"URI-reference"
|
|
|
|
};
|
|
|
|
|
2019-06-14 05:44:51 +02:00
|
|
|
//
|
|
|
|
// uri decompose
|
|
|
|
//
|
|
|
|
|
2020-08-13 06:37:17 +02:00
|
|
|
#pragma GCC visibility push(internal)
|
2019-06-14 05:44:51 +02:00
|
|
|
BOOST_FUSION_ADAPT_STRUCT
|
|
|
|
(
|
|
|
|
ircd::rfc3986::uri,
|
|
|
|
( decltype(ircd::rfc3986::uri::scheme), scheme )
|
|
|
|
( decltype(ircd::rfc3986::uri::user), user )
|
|
|
|
( decltype(ircd::rfc3986::uri::remote), remote )
|
|
|
|
( decltype(ircd::rfc3986::uri::path), path )
|
|
|
|
( decltype(ircd::rfc3986::uri::query), query )
|
|
|
|
( decltype(ircd::rfc3986::uri::fragment), fragment )
|
|
|
|
)
|
2020-08-13 06:37:17 +02:00
|
|
|
#pragma GCC visibility pop
|
2019-06-14 05:44:51 +02:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
namespace ircd::rfc3986::parser
|
2019-06-14 05:44:51 +02:00
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
static const expr uri_parse
|
|
|
|
{
|
2019-06-14 05:44:51 +02:00
|
|
|
raw[parser::scheme] >> lit("://")
|
|
|
|
>> -raw[parser::userinfo >> lit('@')]
|
|
|
|
>> raw[parser::remote]
|
|
|
|
>> raw[parser::path_abempty]
|
|
|
|
>> -raw[lit('?') >> parser::query]
|
|
|
|
>> -raw[lit('#') >> parser::fragment]
|
2022-06-15 05:37:49 +02:00
|
|
|
,"uri"
|
|
|
|
};
|
|
|
|
|
|
|
|
static const rule<rfc3986::uri> parse_uri
|
|
|
|
{
|
|
|
|
expect[uri_parse]
|
|
|
|
,"uri"
|
|
|
|
};
|
|
|
|
}
|
2019-06-14 05:44:51 +02:00
|
|
|
|
2022-06-11 23:13:19 +02:00
|
|
|
ircd::rfc3986::uri::uri(const string_view &input)
|
|
|
|
{
|
2019-06-14 05:44:51 +02:00
|
|
|
const char *start(begin(input)), *const stop(end(input));
|
2022-07-18 00:36:14 +02:00
|
|
|
ircd::parse<error>(start, stop, parser::parse_uri, *this);
|
2019-06-14 05:44:51 +02:00
|
|
|
|
|
|
|
//TODO: XXX Can this go?
|
|
|
|
this->user = rstrip(this->user, '@');
|
|
|
|
this->query = lstrip(this->query, '?');
|
|
|
|
this->fragment = lstrip(this->fragment, '#');
|
|
|
|
}
|
|
|
|
|
2019-06-14 04:06:05 +02:00
|
|
|
//
|
2020-03-29 03:33:34 +02:00
|
|
|
// uri decoding
|
2019-06-14 04:06:05 +02:00
|
|
|
//
|
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
namespace ircd::rfc3986::parser::decoder
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
|
|
|
template<class R = unused_type,
|
|
|
|
class... S>
|
2022-06-17 06:50:22 +02:00
|
|
|
struct [[gnu::visibility("internal")]] rule
|
|
|
|
:qi::rule<const char *, R, S...>
|
|
|
|
{
|
|
|
|
using qi::rule<const char *, R, S...>::rule;
|
|
|
|
};
|
2020-03-29 03:33:34 +02:00
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
static const auto is_safe
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-08-02 21:19:05 +02:00
|
|
|
[](const int8_t val, auto &c, bool &pass)
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
pass = (val > 0x1F) | (val < 0x00);
|
|
|
|
attr_at<0>(c) = val;
|
|
|
|
}
|
|
|
|
};
|
2020-03-29 03:33:34 +02:00
|
|
|
|
2022-08-02 21:19:05 +02:00
|
|
|
const rule<char()> decode_char_safe
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-08-02 21:19:05 +02:00
|
|
|
lit('%') > qi::int_parser<uint8_t, 16, 2, 2>{}[is_safe]
|
2020-03-29 03:33:34 +02:00
|
|
|
,"url decodable character"
|
|
|
|
};
|
|
|
|
|
2022-08-02 21:19:05 +02:00
|
|
|
const expr decode_char
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-08-02 21:19:05 +02:00
|
|
|
lit('%') >> qi::int_parser<uint8_t, 16, 2, 2>{}
|
2022-06-17 06:50:22 +02:00
|
|
|
,"url decodable character"
|
|
|
|
};
|
2020-04-14 02:06:28 +02:00
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
const expr unreserved_char
|
|
|
|
{
|
|
|
|
//char_("A-Za-z0-9._~!$+*'(),-")
|
2020-10-21 06:34:00 +02:00
|
|
|
//NOTE: allow any non-control character here. No reason for trouble with
|
2020-04-14 02:06:28 +02:00
|
|
|
//NOTE: already-decoded inputs unless some other grammar expects it.
|
2020-10-21 06:34:00 +02:00
|
|
|
(~ascii::cntrl) - '%'
|
2020-03-29 03:33:34 +02:00
|
|
|
,"url unreserved characters"
|
|
|
|
};
|
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
const rule<mutable_buffer> decode_safe
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
*(unreserved_char | decode_char_safe)
|
|
|
|
,"url safe decode"
|
2020-03-29 03:33:34 +02:00
|
|
|
};
|
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
const rule<mutable_buffer> decode_unsafe
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
*((char_ - '%') | decode_char)
|
|
|
|
,"url unsafe decode"
|
2020-03-29 03:33:34 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
ircd::const_buffer
|
|
|
|
ircd::rfc3986::decode_unsafe(const mutable_buffer &buf,
|
|
|
|
const string_view &url)
|
2022-08-02 20:58:05 +02:00
|
|
|
noexcept
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
|
|
|
const char *start(url.data()), *const stop
|
|
|
|
{
|
|
|
|
start + std::min(size(url), size(buf))
|
|
|
|
};
|
|
|
|
|
|
|
|
mutable_buffer mb
|
|
|
|
{
|
|
|
|
data(buf), size_t(0)
|
|
|
|
};
|
|
|
|
|
|
|
|
const bool ok
|
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
ircd::parse(std::nothrow, start, stop, parser::decoder::decode_unsafe, mb)
|
2020-03-29 03:33:34 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
assert(size(mb) <= size(url));
|
|
|
|
return string_view
|
|
|
|
{
|
|
|
|
data(mb), size(mb)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
ircd::string_view
|
|
|
|
ircd::rfc3986::decode(const mutable_buffer &buf,
|
|
|
|
const string_view &url)
|
|
|
|
{
|
|
|
|
const char *start(url.data()), *const stop
|
|
|
|
{
|
|
|
|
start + std::min(size(url), size(buf))
|
|
|
|
};
|
|
|
|
|
|
|
|
mutable_buffer mb
|
|
|
|
{
|
|
|
|
data(buf), size_t(0)
|
|
|
|
};
|
|
|
|
|
|
|
|
const bool ok
|
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
ircd::parse<decoding_error>(start, stop, parser::decoder::decode_safe, mb)
|
2020-03-29 03:33:34 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
assert(size(mb) <= size(url));
|
|
|
|
return string_view
|
|
|
|
{
|
|
|
|
data(mb), size(mb)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// uri encoding
|
|
|
|
//
|
|
|
|
|
2022-06-17 06:50:22 +02:00
|
|
|
namespace ircd::rfc3986::parser::encoder
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
|
|
|
template<class R = unused_type,
|
|
|
|
class... S>
|
2022-06-17 06:50:22 +02:00
|
|
|
struct [[gnu::visibility("internal")]] rule
|
|
|
|
:karma::rule<char *, R, S...>
|
|
|
|
{
|
|
|
|
using karma::rule<char *, R, S...>::rule;
|
|
|
|
};
|
2020-03-29 03:33:34 +02:00
|
|
|
|
|
|
|
const rule<char()> unreserved
|
|
|
|
{
|
|
|
|
char_("A-Za-z0-9._~-")
|
|
|
|
,"url unencoded"
|
|
|
|
};
|
|
|
|
|
2020-12-22 17:51:30 +01:00
|
|
|
const rule<string_view> encode
|
2020-03-29 03:33:34 +02:00
|
|
|
{
|
2020-11-08 12:47:27 +01:00
|
|
|
*(unreserved | (lit('%') << karma::right_align(2, '0')[karma::upper[karma::hex]]))
|
2020-03-29 03:33:34 +02:00
|
|
|
,"url encode"
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2018-12-07 01:27:05 +01:00
|
|
|
ircd::string_view
|
2018-12-07 01:41:47 +01:00
|
|
|
ircd::rfc3986::encode(const mutable_buffer &out,
|
|
|
|
const json::members &members)
|
2018-12-07 01:27:05 +01:00
|
|
|
{
|
|
|
|
window_buffer buf{out};
|
|
|
|
const auto append{[&buf](const json::member &member)
|
|
|
|
{
|
2018-12-07 02:31:22 +01:00
|
|
|
assert(type(member.first) == json::STRING);
|
|
|
|
if(unlikely(!member.second.serial && type(member.second) != json::STRING))
|
2019-01-14 00:50:04 +01:00
|
|
|
throw panic
|
2018-12-07 02:31:22 +01:00
|
|
|
{
|
|
|
|
"Cannot encode non-serial json::member type '%s'",
|
|
|
|
reflect(type(member.second))
|
|
|
|
};
|
|
|
|
|
2018-12-07 01:41:47 +01:00
|
|
|
consume(buf, size(encode(buf, member.first)));
|
2020-04-14 07:46:11 +02:00
|
|
|
consume(buf, copy(buf, '='));
|
2018-12-07 01:41:47 +01:00
|
|
|
consume(buf, size(encode(buf, member.second)));
|
2018-12-07 01:27:05 +01:00
|
|
|
}};
|
|
|
|
|
|
|
|
auto it(begin(members));
|
|
|
|
if(it != end(members))
|
|
|
|
{
|
|
|
|
append(*it);
|
|
|
|
for(++it; it != end(members); ++it)
|
|
|
|
{
|
2020-04-14 07:46:11 +02:00
|
|
|
consume(buf, copy(buf, '&'));
|
2018-12-07 01:27:05 +01:00
|
|
|
append(*it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return buf.completed();
|
|
|
|
}
|
|
|
|
|
2017-12-12 21:14:47 +01:00
|
|
|
ircd::string_view
|
2020-05-26 13:22:00 +02:00
|
|
|
ircd::rfc3986::encode(const mutable_buffer &buf_,
|
2018-12-07 01:41:47 +01:00
|
|
|
const string_view &url)
|
2017-12-12 21:14:47 +01:00
|
|
|
{
|
2020-05-26 13:22:00 +02:00
|
|
|
mutable_buffer buf
|
|
|
|
{
|
|
|
|
buf_
|
|
|
|
};
|
|
|
|
|
2020-03-29 03:33:34 +02:00
|
|
|
const bool ok
|
2017-12-12 21:14:47 +01:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
ircd::generate(buf, parser::encoder::encode, url)
|
2017-12-12 21:14:47 +01:00
|
|
|
};
|
|
|
|
|
2018-12-07 01:41:47 +01:00
|
|
|
return string_view
|
|
|
|
{
|
2020-05-26 13:22:00 +02:00
|
|
|
data(buf_), data(buf)
|
2018-12-07 01:41:47 +01:00
|
|
|
};
|
2017-12-12 21:14:47 +01:00
|
|
|
}
|
|
|
|
|
2020-03-29 03:33:34 +02:00
|
|
|
//
|
|
|
|
// general interface
|
|
|
|
//
|
2018-10-03 02:19:50 +02:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
namespace ircd::rfc3986::parser
|
2019-03-13 18:50:16 +01:00
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
[[gnu::visibility("internal")]]
|
|
|
|
extern const rule<string_view>
|
|
|
|
host_literal_parse,
|
|
|
|
host_non_literal_parse,
|
|
|
|
host_alternative_parse,
|
2020-05-19 05:35:02 +02:00
|
|
|
host_parse;
|
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
[[gnu::visibility("internal")]]
|
|
|
|
extern const rule<uint16_t>
|
2020-05-19 05:35:02 +02:00
|
|
|
port_parse;
|
2020-05-14 22:00:23 +02:00
|
|
|
}
|
2019-03-25 20:07:06 +01:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
decltype(ircd::rfc3986::parser::host_literal_parse)
|
|
|
|
ircd::rfc3986::parser::host_literal_parse
|
2020-05-14 22:00:23 +02:00
|
|
|
{
|
|
|
|
parser::ip6_address
|
|
|
|
};
|
2019-03-25 20:07:06 +01:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
decltype(ircd::rfc3986::parser::host_non_literal_parse)
|
|
|
|
ircd::rfc3986::parser::host_non_literal_parse
|
2020-05-14 22:00:23 +02:00
|
|
|
{
|
|
|
|
parser::ip6_address | parser::ip4_address | parser::domain
|
|
|
|
};
|
2020-05-14 19:33:54 +02:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
decltype(ircd::rfc3986::parser::host_alternative_parse)
|
|
|
|
ircd::rfc3986::parser::host_alternative_parse
|
2020-05-14 22:00:23 +02:00
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
(&lit('[') > raw[host_literal_parse]) | raw[host_non_literal_parse]
|
2020-05-14 22:00:23 +02:00
|
|
|
,"host"
|
|
|
|
};
|
2019-03-13 18:50:16 +01:00
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
decltype(ircd::rfc3986::parser::host_parse)
|
|
|
|
ircd::rfc3986::parser::host_parse
|
2020-05-14 22:00:23 +02:00
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
expect[host_alternative_parse >> omit[&lit(':') | eoi]]
|
2020-05-14 22:00:23 +02:00
|
|
|
,"host"
|
|
|
|
};
|
|
|
|
|
2022-06-15 05:37:49 +02:00
|
|
|
decltype(ircd::rfc3986::parser::port_parse)
|
|
|
|
ircd::rfc3986::parser::port_parse
|
2020-05-15 01:36:50 +02:00
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
omit[host_alternative_parse] >> (omit[lit(':')] >> port) >> eoi
|
2020-05-15 01:36:50 +02:00
|
|
|
,"port"
|
|
|
|
};
|
|
|
|
|
2020-05-14 22:00:23 +02:00
|
|
|
ircd::string_view
|
|
|
|
ircd::rfc3986::host(const string_view &str)
|
|
|
|
{
|
2019-03-13 18:50:16 +01:00
|
|
|
string_view ret;
|
|
|
|
const char *start(str.data()), *const stop(start + str.size());
|
2022-06-15 05:37:49 +02:00
|
|
|
ircd::parse<error>(start, stop, parser::host_parse, ret);
|
2020-05-14 22:00:23 +02:00
|
|
|
assert(!ret.empty());
|
2019-03-13 18:50:16 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t
|
|
|
|
ircd::rfc3986::port(const string_view &str)
|
|
|
|
{
|
2022-06-15 05:37:49 +02:00
|
|
|
uint16_t ret(0);
|
2019-03-13 18:50:16 +01:00
|
|
|
const char *start(str.data()), *const stop(start + str.size());
|
2022-06-15 05:37:49 +02:00
|
|
|
ircd::parse<error>(start, stop, parser::port_parse, ret);
|
2019-03-13 18:50:16 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-06-14 04:06:05 +02:00
|
|
|
//
|
|
|
|
// validators
|
|
|
|
//
|
|
|
|
|
2018-10-03 02:19:50 +02:00
|
|
|
bool
|
|
|
|
ircd::rfc3986::valid_remote(std::nothrow_t,
|
|
|
|
const string_view &str)
|
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(likely(str.size() <= DOMAIN_MAX + 6))
|
|
|
|
return valid(std::nothrow, parser::remote, str);
|
2019-03-13 18:31:11 +01:00
|
|
|
|
2022-06-11 23:12:41 +02:00
|
|
|
return false;
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ircd::rfc3986::valid_remote(const string_view &str)
|
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(unlikely(str.size() > DOMAIN_MAX + 6))
|
2019-03-13 18:31:11 +01:00
|
|
|
throw error
|
|
|
|
{
|
|
|
|
"String length %zu exceeds maximum of %zu characters",
|
|
|
|
size(str),
|
|
|
|
DOMAIN_MAX + 6
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
valid(parser::remote, str);
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_domain(std::nothrow_t,
|
|
|
|
const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(likely(str.size() <= DOMAIN_MAX))
|
|
|
|
return valid(std::nothrow, parser::domain, str);
|
2019-03-13 18:31:11 +01:00
|
|
|
|
2022-06-11 23:12:41 +02:00
|
|
|
return false;
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_domain(const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(unlikely(str.size() > DOMAIN_MAX))
|
2019-03-13 18:31:11 +01:00
|
|
|
throw error
|
|
|
|
{
|
|
|
|
"String length %zu exceeds maximum of %zu characters",
|
|
|
|
size(str),
|
|
|
|
DOMAIN_MAX
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
valid(parser::domain, str);
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_host(std::nothrow_t,
|
|
|
|
const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(likely(str.size() <= DOMAIN_MAX))
|
|
|
|
return valid(std::nothrow, parser::host, str);
|
2019-03-13 18:31:11 +01:00
|
|
|
|
2022-06-11 23:12:41 +02:00
|
|
|
return false;
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_host(const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(unlikely(str.size() > DOMAIN_MAX))
|
2019-03-13 18:31:11 +01:00
|
|
|
throw error
|
|
|
|
{
|
|
|
|
"String length %zu exceeds maximum of %zu characters",
|
|
|
|
size(str),
|
|
|
|
DOMAIN_MAX
|
|
|
|
};
|
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
valid(parser::host, str);
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
2019-03-25 04:46:10 +01:00
|
|
|
bool
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_hostname(std::nothrow_t,
|
|
|
|
const string_view &str)
|
2019-03-25 04:46:10 +01:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(likely(str.size() <= HOSTNAME_MAX))
|
|
|
|
return valid(std::nothrow, parser::hostname, str);
|
2019-03-25 04:46:10 +01:00
|
|
|
|
2022-06-11 23:12:41 +02:00
|
|
|
return false;
|
2019-03-25 04:46:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid_hostname(const string_view &str)
|
2019-03-25 04:46:10 +01:00
|
|
|
{
|
2022-06-11 23:12:41 +02:00
|
|
|
if(unlikely(str.size() > HOSTNAME_MAX))
|
2019-03-25 20:07:06 +01:00
|
|
|
throw error
|
|
|
|
{
|
|
|
|
"String length %zu exceeds maximum of %zu characters",
|
|
|
|
size(str),
|
|
|
|
HOSTNAME_MAX
|
|
|
|
};
|
2019-03-25 04:46:10 +01:00
|
|
|
|
2019-03-25 20:07:06 +01:00
|
|
|
valid(parser::hostname, str);
|
2019-03-25 04:46:10 +01:00
|
|
|
}
|
|
|
|
|
2018-10-03 02:19:50 +02:00
|
|
|
bool
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid(std::nothrow_t,
|
|
|
|
const parser::rule<> &rule,
|
|
|
|
const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2019-04-17 00:27:09 +02:00
|
|
|
const parser::rule<> only_rule
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
rule >> parser::eoi
|
2018-10-03 02:19:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
const char *start(str.data()), *const stop(start + str.size());
|
2022-06-11 23:11:36 +02:00
|
|
|
return ircd::parse(std::nothrow, start, stop, only_rule);
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2019-03-25 20:07:06 +01:00
|
|
|
ircd::rfc3986::valid(const parser::rule<> &rule,
|
|
|
|
const string_view &str)
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2019-04-17 00:27:09 +02:00
|
|
|
const parser::rule<> only_rule
|
2018-10-03 02:19:50 +02:00
|
|
|
{
|
2022-06-17 06:50:22 +02:00
|
|
|
parser::eps > (rule >> parser::eoi)
|
2018-10-03 02:19:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
const char *start(str.data()), *const stop(start + str.size());
|
2022-06-17 06:50:22 +02:00
|
|
|
ircd::parse<error>(start, stop, only_rule);
|
2018-10-03 02:19:50 +02:00
|
|
|
}
|