diff --git a/include/ircd/rfc3986.h b/include/ircd/rfc3986.h index 326fd72e0..2f4c4f273 100644 --- a/include/ircd/rfc3986.h +++ b/include/ircd/rfc3986.h @@ -30,6 +30,7 @@ namespace ircd::rfc3986 string_view encode(const mutable_buffer &, const string_view &url); string_view encode(const mutable_buffer &, const json::members &); string_view decode(const mutable_buffer &, const string_view &url); + const_buffer decode_unsafe(const mutable_buffer &, const string_view &url); // extractor suite uint16_t port(const string_view &remote); // get portnum from valid remote diff --git a/ircd/rfc3986.cc b/ircd/rfc3986.cc index 27978a93d..5004b362d 100644 --- a/ircd/rfc3986.cc +++ b/ircd/rfc3986.cc @@ -23,60 +23,6 @@ __attribute__((visibility("hidden"))) using namespace ircd::spirit; }}} -struct ircd::rfc3986::encoder -:karma::grammar -{ - [[noreturn]] void throw_illegal() - { - throw encoding_error - { - "Generator Protection: urlencode" - }; - } - - karma::rule url_encoding - { - *(karma::char_("A-Za-z0-9") | (karma::lit('%') << karma::hex)) - ,"url encoding" - }; - - encoder(): encoder::base_type{url_encoding} {} -} -const ircd::rfc3986::encoder; - -struct ircd::rfc3986::decoder -:qi::grammar -{ - template using rule = qi::rule; - - rule<> url_illegal - { - char_(0x00, 0x1f) - ,"url illegal" - }; - - rule url_encodable - { - char_("A-Za-z0-9") - ,"url encodable character" - }; - - rule urlencoded_character - { - '%' > qi::uint_parser{} - ,"urlencoded character" - }; - - rule url_decode - { - *((char_ - '%') | urlencoded_character) - ,"urldecode" - }; - - decoder(): decoder::base_type { url_decode } {} -} -const ircd::rfc3986::decoder; - decltype(ircd::rfc3986::parser::sub_delims) ircd::rfc3986::parser::sub_delims { @@ -472,9 +418,149 @@ ircd::rfc3986::uri::uri(const string_view &input) } // -// general interface +// uri decoding // +struct ircd::rfc3986::decoder +:qi::grammar +{ + template + using rule = qi::rule; + + [[noreturn]] static void throw_unsafe() + { + throw decoding_error + { + "Unsafe characters in decoding." + }; + } + + const rule decode_char + { + lit('%') > qi::uint_parser{} + ,"url decodable character" + }; + + const rule unreserved_char + { + // unreserved characters and !$+*'(), + char_("A-Za-z0-9._~!$+*'(),-") + ,"url unreserved characters" + }; + + const rule decode_unsafe + { + *((char_ - '%') | decode_char) + ,"url unsafe decode" + }; + + rule decode_safe + { + rule{} + ,"url safe decode" + }; + + decoder() + :decoder::base_type{decode_safe} + { + //TODO: XXX this never reports failure to throw; it just stops parsing + decode_safe %= *(unreserved_char | decode_char[_pass = (local::_1 > 0x1F)]); + } +} +const ircd::rfc3986::decoder; + +ircd::const_buffer +ircd::rfc3986::decode_unsafe(const mutable_buffer &buf, + const string_view &url) +try +{ + const char *start(url.data()), *const stop + { + start + std::min(size(url), size(buf)) + }; + + mutable_buffer mb + { + data(buf), size_t(0) + }; + + const bool ok + { + qi::parse(start, stop, decoder.decode_unsafe, mb) + }; + + assert(size(mb) <= size(url)); + return string_view + { + data(mb), size(mb) + }; +} +catch(const qi::expectation_failure &e) +{ + throw expectation_failure{e}; +} + +ircd::string_view +ircd::rfc3986::decode(const mutable_buffer &buf, + const string_view &url) +try +{ + const char *start(url.data()), *const stop + { + start + std::min(size(url), size(buf)) + }; + + mutable_buffer mb + { + data(buf), size_t(0) + }; + + const bool ok + { + qi::parse(start, stop, decoder.decode_safe, mb) + }; + + assert(size(mb) <= size(url)); + return string_view + { + data(mb), size(mb) + }; +} +catch(const qi::expectation_failure &e) +{ + throw expectation_failure{e}; +} + +// +// uri encoding +// + +struct ircd::rfc3986::encoder +:karma::grammar +{ + template + using rule = karma::rule; + + const rule unreserved + { + char_("A-Za-z0-9._~-") + ,"url unencoded" + }; + + const rule encode + { + *(unreserved | (lit('%') << karma::right_align(2, '0')[karma::hex])) + ,"url encode" + }; + + encoder() + :encoder::base_type{encode} + {} +} +const ircd::rfc3986::encoder; + ircd::string_view ircd::rfc3986::encode(const mutable_buffer &out, const json::members &members) @@ -514,49 +600,20 @@ ircd::rfc3986::encode(const mutable_buffer &buf, const string_view &url) { char *out(data(buf)); - karma::generate(out, maxwidth(size(buf))[encoder], url); + const bool ok + { + karma::generate(out, maxwidth(size(buf))[encoder], url) + }; + return string_view { data(buf), size_t(std::distance(data(buf), out)) }; } -ircd::string_view -ircd::rfc3986::decode(const mutable_buffer &buf, - const string_view &url) -try -{ - const char *start(url.data()), *const stop - { - start + std::min(size(url), size(buf)) - }; - - mutable_buffer mb - { - data(buf), size_t(0) - }; - - qi::parse(start, stop, eps > decoder, mb); - return string_view - { - data(mb), size(mb) - }; -} -catch(const qi::expectation_failure &e) -{ - const auto rule - { - ircd::string(e.what_) - }; - - throw decoding_error - { - "I require a valid urlencoded %s. You sent %zu invalid chars starting with `%s'.", - between(rule, "<", ">"), - size_t(e.last - e.first), - string_view{e.first, e.last} - }; -} +// +// general interface +// ircd::string_view ircd::rfc3986::host(const string_view &str)