From fcc654699dd59d08226a1efc46ea5f66178ce274 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Fri, 4 Nov 2016 14:46:42 -0700 Subject: [PATCH] ircd::locale: Add char16_t conversion overloads for unterminated strings. ircd::js: Use sized conversions when calling ircd::locale. --- include/ircd/js/native.h | 2 +- include/ircd/locale.h | 8 +++ ircd/locale.cc | 102 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 3 deletions(-) diff --git a/include/ircd/js/native.h b/include/ircd/js/native.h index 553e7aa4d..0dd0b48b5 100644 --- a/include/ircd/js/native.h +++ b/include/ircd/js/native.h @@ -56,6 +56,6 @@ ircd::js::native_external_copy(const char *const &s, const size_t &len) { auto buf(std::make_unique(len + 1)); - ircd::locale::char16::conv(s, buf.get(), len + 1); + ircd::locale::char16::conv(s, len, buf.get(), len + 1); return buf; } diff --git a/include/ircd/locale.h b/include/ircd/locale.h index a717600bf..a87f0cde0 100644 --- a/include/ircd/locale.h +++ b/include/ircd/locale.h @@ -35,10 +35,18 @@ namespace char16 { char conv(const char16_t &); char16_t conv(const char &); + + size_t conv(const char16_t *const &, const size_t &len, char *const &buf, const size_t &max); + size_t conv(const char *const &, const size_t &len, char16_t *const &buf, const size_t &max); // uint8_t = max*2 + size_t conv(const char16_t *const &, char *const &buf, const size_t &max); size_t conv(const char *const &, char16_t *const &buf, const size_t &max); // uint8_t = max*2 + + std::string conv(const char16_t *const &, const size_t &len); std::string conv(const char16_t *const &); std::string conv(const std::u16string &); + + std::u16string conv(const char *const &, const size_t &len); std::u16string conv(const char *const &); std::u16string conv(const std::string &); } diff --git a/ircd/locale.cc b/ircd/locale.cc index 70c7e5417..dee2eed90 100644 --- a/ircd/locale.cc +++ b/ircd/locale.cc @@ -59,6 +59,23 @@ ircd::locale::char16::conv(const char *const &s) } #endif +#ifdef HAVE_CODECVT +std::u16string +ircd::locale::char16::conv(const char *const &s, + const size_t &len) +{ + static std::wstring_convert, char16_t> converter; + return s && len? converter.from_bytes(s, s + len) : std::u16string{}; +} +#else +std::u16string +ircd::locale::char16::conv(const char *const &s, + const size_t &len) +{ + return boost::locale::conv::utf_to_utf(s, s + len); +} +#endif + #ifdef HAVE_CODECVT std::string ircd::locale::char16::conv(const std::u16string &s) @@ -89,6 +106,23 @@ ircd::locale::char16::conv(const char16_t *const &s) } #endif +#ifdef HAVE_CODECVT +std::string +ircd::locale::char16::conv(const char16_t *const &s, + const size_t &len) +{ + static std::wstring_convert, char16_t> converter; + return s && len? converter.to_bytes(s, s + len) : std::string{}; +} +#else +std::string +ircd::locale::char16::conv(const char16_t *const &s, + const size_t &len) +{ + return boost::locale::conv::utf_to_utf(s, s + len); +} +#endif + #ifdef HAVE_CODECVT size_t ircd::locale::char16::conv(const char16_t *const &str, @@ -104,8 +138,8 @@ ircd::locale::char16::conv(const char16_t *const &str, #else size_t ircd::locale::char16::conv(const char16_t *const &str, - char *const &buf, - const size_t &max) + char *const &buf, + const size_t &max) { //TODO: optimize const auto s(boost::locale::conv::utf_to_utf(str)); @@ -113,6 +147,32 @@ ircd::locale::char16::conv(const char16_t *const &str, } #endif +#ifdef HAVE_CODECVT +size_t +ircd::locale::char16::conv(const char16_t *const &str, + const size_t &len, + char *const &buf, + const size_t &max) +{ + static std::wstring_convert, char16_t> converter; + + //TODO: optimize + const auto s(converter.to_bytes(str, str + len)); + return rb_strlcpy(buf, s.c_str(), max); +} +#else +size_t +ircd::locale::char16::conv(const char16_t *const &str, + const size_t &len, + char *const &buf, + const size_t &max) +{ + //TODO: optimize + const auto s(boost::locale::conv::utf_to_utf(str, str + len)); + return rb_strlcpy(buf, s.c_str(), max); +} +#endif + #ifdef HAVE_CODECVT size_t ircd::locale::char16::conv(const char *const &str, @@ -149,6 +209,44 @@ ircd::locale::char16::conv(const char *const &str, } #endif +#ifdef HAVE_CODECVT +size_t +ircd::locale::char16::conv(const char *const &str, + const size_t &len, + char16_t *const &buf, + const size_t &max) +{ + static std::wstring_convert, char16_t> converter; + + if(unlikely(!max)) + return 0; + + //TODO: optimize + const auto s(converter.from_bytes(str, str + len)); + const auto cpsz(std::min(s.size(), size_t(max - 1))); + memcpy(buf, s.data(), cpsz * 2); + buf[cpsz] = char16_t(0); + return cpsz; +} +#else +size_t +ircd::locale::char16::conv(const char *const &str, + const size_t &len, + char16_t *const &buf, + const size_t &max) +{ + if(unlikely(!max)) + return 0; + + //TODO: optimize + const auto s(boost::locale::conv::utf_to_utf(str, str + len)); + const auto cpsz(std::min(s.size(), size_t(max - 1))); + memcpy(buf, s.data(), cpsz * 2); + buf[cpsz] = char16_t(0); + return cpsz; +} +#endif + char16_t ircd::locale::char16::conv(const char &c) {